diff --git a/AGENTS.md b/AGENTS.md index c43b0a3..61f0bfd 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -28,7 +28,7 @@ Pre-flight order: `make check` then `make build`. | Test placement strategy | [architecture repo](https://github.com/openshift-hyperfleet/architecture/blob/main/hyperfleet/docs/e2e-testing/test-placement-strategy.md) — which layer a test belongs in (unit / integration / E2E) | | Test writing guide | `docs/development.md` | | Debugging | `docs/debugging.md` | -| Local kind setup | `docs/local-kind-setup.md` | +| Setup Guide | `docs/setup.md` | | Runbook | `docs/runbook.md` | | Contributing | `CONTRIBUTING.md` | | Test case templates | `test-design/templates/` | diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 630c333..b8dbe7f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -54,16 +54,28 @@ hyperfleet-e2e/ │ ├── labels/ - Test label definitions │ └── logger/ - Structured logging (slog) ├── e2e/ - Test suites -│ ├── adapter/ - Adapter failover and Maestro tests +│ ├── adapter/ - Adapter lifecycle tests +│ ├── channel/ - Channel management tests │ ├── cluster/ - Cluster lifecycle tests -│ └── nodepool/ - NodePool management tests +│ ├── nodepool/ - NodePool management tests +│ └── version/ - Version management tests ├── testdata/ - Test payloads and fixtures +│ ├── adapter-configs/ - Adapter configuration files │ └── payloads/ │ ├── clusters/ - Cluster creation payloads -│ └── nodepools/- NodePool creation payloads +│ └── nodepools/ - NodePool creation payloads +├── test-design/ - Test design documentation +│ ├── templates/ - Test case templates +│ ├── testcases/ - Test case documents +│ └── user-journeys/ - User journey maps ├── configs/ - Configuration files │ └── config.yaml - Default configuration -└── docs/ - Documentation +├── docs/ - Documentation +├── env/ - Environment configuration files +├── hack/ - Build and development scripts +├── images/ - Container image definitions +├── openapi/ - OpenAPI spec and generation config +└── scripts/ - Utility scripts ``` ## Testing diff --git a/Dockerfile b/Dockerfile index 9aa27a7..be4ac01 100644 --- a/Dockerfile +++ b/Dockerfile @@ -62,9 +62,6 @@ COPY --from=builder /build/bin/hyperfleet-e2e /usr/local/bin/ # Copy test payloads and fixtures COPY --from=builder /build/testdata /e2e/testdata -# Copy deploy scripts -COPY --from=builder /build/deploy-scripts /e2e/deploy-scripts - # Copy env files COPY --from=builder /build/env /e2e/env diff --git a/Makefile b/Makefile index e109b26..5993620 100644 --- a/Makefile +++ b/Makefile @@ -137,20 +137,6 @@ verify: generate fmt-check vet ## Run all verification checks .PHONY: check check: verify lint test ## Run all checks (fmt, vet, lint, test) -##@ Local kind Development (see docs/local-kind-setup.md) - -.PHONY: local-up -local-up: ## Full local setup: kind cluster + deploy + port-forward - ./deploy-scripts/kind-local.sh up - -.PHONY: local-down -local-down: ## Remove all components from local kind cluster - ./deploy-scripts/kind-local.sh down - -.PHONY: local-rebuild -local-rebuild: ## Rebuild + restart. Usage: make local-rebuild C=hyperfleet-adapter - ./deploy-scripts/kind-local.sh rebuild $(if $(NO_CACHE),--no-cache) $(C) - ##@ Container Images .PHONY: image diff --git a/README.md b/README.md index 0215df0..bc77210 100644 --- a/README.md +++ b/README.md @@ -10,19 +10,8 @@ Black-box end-to-end testing for validating the HyperFleet cluster lifecycle man HyperFleet E2E is a Ginkgo-based testing framework that validates HyperFleet cluster lifecycle management through black-box tests. It creates ephemeral test clusters for each test, providing complete isolation and supporting parallel execution. ## Quick Start - -```bash -# Clone and build -git clone https://github.com/openshift-hyperfleet/hyperfleet-e2e.git -cd hyperfleet-e2e -make build - -# Set API URL and run tests -export HYPERFLEET_API_URL=https://api.hyperfleet.example.com -./bin/hyperfleet-e2e test --label-filter=tier0 -``` - -**Done!** The framework created a cluster, validated adapters, and cleaned up resources. +- **[Setup Guide](docs/setup.md)** - Setup environment to run e2e tests +- **[Getting Started](docs/getting-started.md)** - Getting started guide ## Running Tests @@ -79,15 +68,28 @@ hyperfleet-e2e/ │ ├── labels/ - Test label definitions │ └── logger/ - Structured logging (slog) ├── e2e/ - Test suites +│ ├── adapter/ - Adapter lifecycle tests +│ ├── channel/ - Channel management tests │ ├── cluster/ - Cluster lifecycle tests -│ └── nodepool/ - NodePool management tests +│ ├── nodepool/ - NodePool management tests +│ └── version/ - Version management tests ├── testdata/ - Test payloads and fixtures +│ ├── adapter-configs/ - Adapter configuration files │ └── payloads/ │ ├── clusters/ - Cluster creation payloads │ └── nodepools/ - NodePool creation payloads +├── test-design/ - Test design documentation +│ ├── templates/ - Test case templates +│ ├── testcases/ - Test case documents +│ └── user-journeys/ - User journey maps ├── configs/ - Configuration files │ └── config.yaml - Default configuration -└── docs/ - Documentation +├── docs/ - Documentation +├── env/ - Environment configuration files +├── hack/ - Build and development scripts +├── images/ - Container image definitions +├── openapi/ - OpenAPI spec and generation config +└── scripts/ - Utility scripts ``` ## Key Features @@ -102,8 +104,8 @@ hyperfleet-e2e/ ## Documentation - **[System Architecture](https://github.com/openshift-hyperfleet/architecture)** - Single source of truth for all HyperFleet architectural documentation -- **[Local kind Setup](docs/local-kind-setup.md)** - Run E2E tests locally with kind and RabbitMQ -- **[Runbook](docs/runbook.md)** - E2E test runbook for GKE clusters +- **[Setup Guide](docs/setup.md)** - Setup environment to run e2e tests +- **[Runbook](docs/runbook.md)** - E2E test runbook - **[Getting Started](docs/getting-started.md)** - Run your first test in 10 minutes - **[Framework Architecture](docs/architecture.md)** - Understand the framework design - **[Development](docs/development.md)** - Write new tests diff --git a/deploy-scripts/.env.example b/deploy-scripts/.env.example deleted file mode 100644 index 3b0b1e1..0000000 --- a/deploy-scripts/.env.example +++ /dev/null @@ -1,142 +0,0 @@ -# ============================================================================ -# CLM Components Deployment Configuration -# ============================================================================ -# This file contains environment variables used by deploy-clm.sh -# Copy this file to .env and modify values as needed for your environment: -# cp .env.example .env - -# Kubernetes Configuration -# NAMESPACE must be unique to prevent GCP Pub/Sub topic/subscription collisions. -# GCP resources are named using the namespace (e.g., ${NAMESPACE}-clusters), -# so multiple users sharing the same namespace will interfere with each other's tests. -export NAMESPACE="${NAMESPACE:-hyperfleet-e2e-$(echo ${USER:-default} | tr '[:upper:]' '[:lower:]')}" - -# Provider Configuration -export GCP_PROJECT_ID="${GCP_PROJECT_ID:-hcm-hyperfleet}" - -# Image Registry Configuration -export IMAGE_REGISTRY="${IMAGE_REGISTRY:-registry.ci.openshift.org}" - -# API Component Configuration -export API_IMAGE_REPO="${API_IMAGE_REPO:-ci/hyperfleet-api}" -export API_IMAGE_TAG="${API_IMAGE_TAG:-latest}" -export API_SERVICE_TYPE="${API_SERVICE_TYPE:-LoadBalancer}" - -# ============================================================================ -# Adapter Deployment Configuration -# ============================================================================ -# REQUIRED: Specify which adapters to deploy (comma-separated list) -# These environment variables are REQUIRED - auto-discovery has been removed - -# Cluster-level adapters to deploy -# Available adapters: cl-namespace, cl-job, cl-deployment, cl-maestro -export CLUSTER_TIER0_ADAPTERS_DEPLOYMENT="${CLUSTER_TIER0_ADAPTERS_DEPLOYMENT:-cl-namespace,cl-job,cl-deployment,cl-maestro}" - -# NodePool-level adapters to deploy -# Available adapters: np-configmap -export NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT="${NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT:-np-configmap}" -# Base directory containing adapter test data folders -# Each adapter must have its own folder: ${ADAPTERS_FILE_DIR}// -# Note: Uses TESTDATA_DIR environment variable if not explicitly set -# ADAPTERS_FILE_DIR="${TESTDATA_DIR}/adapter-configs" - -# ============================================================================ -# API Adapter Configuration -# ============================================================================ -# NOTE: These are SEPARATE from tier0 deployment configuration above -# These should be set based on specific test case requirements -# Set per test case as needed - -# Adapters for API cluster configuration -export API_ADAPTERS_CLUSTER="${API_ADAPTERS_CLUSTER:-cl-namespace,cl-job,cl-deployment,cl-maestro}" - -# Adapters for API nodepool configuration -export API_ADAPTERS_NODEPOOL="${API_ADAPTERS_NODEPOOL:-np-configmap}" - -# Sentinel Component Configuration -export SENTINEL_IMAGE_REPO="${SENTINEL_IMAGE_REPO:-ci/hyperfleet-sentinel}" -export SENTINEL_IMAGE_TAG="${SENTINEL_IMAGE_TAG:-latest}" -export SENTINEL_BROKER_TYPE="${SENTINEL_BROKER_TYPE:-googlepubsub}" -export SENTINEL_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING="${SENTINEL_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING:-true}" - -# Adapter Component Configuration -export ADAPTER_IMAGE_REPO="${ADAPTER_IMAGE_REPO:-ci/hyperfleet-adapter}" -export ADAPTER_IMAGE_TAG="${ADAPTER_IMAGE_TAG:-latest}" -export ADAPTER_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING="${ADAPTER_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING:-true}" -export ADAPTER_GOOGLEPUBSUB_CREATE_SUBSCRIPTION_IF_MISSING="${ADAPTER_GOOGLEPUBSUB_CREATE_SUBSCRIPTION_IF_MISSING:-true}" - -# Adapter Pub/Sub Configuration (Optional) -# If not set, these will be auto-generated based on namespace and resource type: -# ADAPTER_SUBSCRIPTION_ID: ${NAMESPACE}-${resource_type}-${adapter_name} -# ADAPTER_TOPIC: ${NAMESPACE}-${resource_type} -# ADAPTER_DEAD_LETTER_TOPIC: ${NAMESPACE}-${resource_type}-dlq - - -# HyperFleet API Configuration -# Note: If namespace is different, use: http://hyperfleet-api.${NAMESPACE}.svc.cluster.local:8000 -export API_BASE_URL="${API_BASE_URL:-http://hyperfleet-api:8000}" - - -# Helm Chart Sources -# Note: ADAPTER_CHART_* and API_CHART_* vars are also required by tier2 E2E tests -# (e.g., crash recovery) which deploy dedicated adapters and upgrade API config at runtime. -# When running tier2 tests in CI, ensure these are exported alongside GINKGO_LABEL_FILTER=tier2. -export API_CHART_REPO="${API_CHART_REPO:-https://github.com/openshift-hyperfleet/hyperfleet-api.git}" -export API_CHART_REF="${API_CHART_REF:-main}" -export API_CHART_PATH="${API_CHART_PATH:-charts}" - -export SENTINEL_CHART_REPO="${SENTINEL_CHART_REPO:-https://github.com/openshift-hyperfleet/hyperfleet-sentinel.git}" -export SENTINEL_CHART_REF="${SENTINEL_CHART_REF:-main}" -export SENTINEL_CHART_PATH="${SENTINEL_CHART_PATH:-charts}" - -export ADAPTER_CHART_REPO="${ADAPTER_CHART_REPO:-https://github.com/openshift-hyperfleet/hyperfleet-adapter.git}" -export ADAPTER_CHART_REF="${ADAPTER_CHART_REF:-main}" -export ADAPTER_CHART_PATH="${ADAPTER_CHART_PATH:-charts}" - -# Component Selection Flags (true/false) -export INSTALL_API="${INSTALL_API:-true}" -export INSTALL_SENTINEL="${INSTALL_SENTINEL:-true}" -export INSTALL_ADAPTER="${INSTALL_ADAPTER:-true}" - -# Uninstall Options (only used when --action uninstall) -# DELETE_K8S_RESOURCES: Delete Kubernetes resources (Helm releases + namespace) -export DELETE_K8S_RESOURCES="${DELETE_K8S_RESOURCES:-false}" - -# DELETE_CLOUD_RESOURCES: Delete GCP Pub/Sub topics and subscriptions -export DELETE_CLOUD_RESOURCES="${DELETE_CLOUD_RESOURCES:-false}" - -# DELETE_ALL: Delete everything (k8s + cloud resources) - overrides individual flags -export DELETE_ALL="${DELETE_ALL:-false}" - -# Execution Options -export DRY_RUN="${DRY_RUN:-false}" -export VERBOSE="${VERBOSE:-false}" - -# ============================================================================ -# Local kind Development (used by kind-local.sh, see docs/local-kind-setup.md) -# ============================================================================ -# Uncomment and modify for local kind setup. Not used by deploy-clm.sh or CI. - -# Parent directory containing component repos (hyperfleet-api, hyperfleet-sentinel, etc.) -# export PROJECTS_DIR="${HOME}/projects" - -# Path to hyperfleet-infra repo (for RabbitMQ + Maestro install) -# export INFRA_DIR="${HOME}/projects/hyperfleet-infra" - -# Kind cluster name (kubectl context becomes kind-) -# export KIND_CLUSTER="kind" - -# Kubernetes namespace for local deployment (overrides NAMESPACE above) -# export NAMESPACE="hyperfleet-local" - -# Maestro configuration -# export MAESTRO_NS="maestro" -# export MAESTRO_CONSUMER="cluster1" -# export MAESTRO_LOCAL_PORT="8100" - -# RabbitMQ connection URL (in-cluster service name) -# export RABBITMQ_URL="amqp://guest:guest@rabbitmq:5672" - -# E2E test URLs (set these so 'make e2e' works without extra env vars) -# export HYPERFLEET_API_URL="http://localhost:8000" -# export MAESTRO_URL="http://localhost:8100" diff --git a/deploy-scripts/README.md b/deploy-scripts/README.md deleted file mode 100644 index 7cadd4e..0000000 --- a/deploy-scripts/README.md +++ /dev/null @@ -1,342 +0,0 @@ -# CLM Deployment Scripts - -Automated deployment scripts for HyperFleet CLM (Cluster Lifecycle Management) components. - -## Overview - -The `deploy-clm.sh` script automates the installation and uninstallation of HyperFleet CLM components (API, Sentinel, and Adapters) using Helm for E2E testing environments. It provides a consistent and repeatable deployment process with comprehensive validation and error handling. - -## Features - -- **Install/Uninstall Operations**: Deploy or remove all CLM components with a single command -- **Configurable Components**: Install all components or selectively skip specific ones -- **Image Customization**: Configure custom image repositories and tags for each component -- **Helm Chart Management**: Automatically clone and use Helm charts from component repositories -- **Pod Health Verification**: Automatic verification of pod health after deployment with failure detection (CrashLoopBackOff, ImagePullBackOff, etc.) -- **Namespace Lifecycle**: Automatic namespace creation and deletion -- **Infrastructure Validation**: Pre-deployment checks for cluster readiness -- **Dry-Run Support**: Test deployment without making changes -- **Verbose Logging**: Detailed logging for troubleshooting -- **Error Handling**: Comprehensive validation and graceful error handling with automatic log retrieval on failures - -## Prerequisites - -The script requires the following tools to be installed: - -- `kubectl` - Kubernetes command-line tool -- `helm` - Helm package manager (v3+) -- `git` - Git version control - -Ensure you have: -- Valid kubeconfig with access to target cluster -- Appropriate RBAC permissions for namespace and resource management -- Network access to component Git repositories and image registries - -## Quick Start - -### Option 1: Using Command-Line Flags (Simple) - -Install all components with default settings: - -```bash -./deploy-scripts/deploy-clm.sh --action install --namespace -``` - -Install with custom image tags: - -```bash -./deploy-scripts/deploy-clm.sh --action install \ - --namespace \ - --api-image-tag v1.2.0 \ - --sentinel-image-tag v1.2.0 \ - --adapter-image-tag v1.2.0 -``` - -Uninstall all components: - -```bash -./deploy-scripts/deploy-clm.sh --action uninstall --namespace -``` - -### Option 2: Using .env File (Recommended for Complex Configurations) - -For easier management of deployment parameters, use a `.env` file: - -1. **Copy the example configuration:** - ```bash - cd deploy-scripts/ - cp .env.example .env - ``` - -2. **Edit `.env` with your settings:** - ```bash - vim .env # or your preferred editor - ``` - - Key parameters you can configure: - - `NAMESPACE` - Kubernetes namespace (default: `hyperfleet-e2e-$USER`) - - `IMAGE_REGISTRY` - Container image registry - - `API_IMAGE_TAG`, `SENTINEL_IMAGE_TAG`, `ADAPTER_IMAGE_TAG` - Image tags - - `GCP_PROJECT_ID` - Google Cloud Project ID for Pub/Sub - - `INSTALL_API`, `INSTALL_SENTINEL`, `INSTALL_ADAPTER` - Component selection - - See [.env.example](.env.example) for all available parameters. - -3. **Run the deployment:** - ```bash - ./deploy-clm.sh --action install - ``` - -**Configuration Priority:** -- Command-line flags override .env file values -- .env file values override script defaults -- This allows baseline config in `.env` with per-run overrides via flags - -## Command-Line Reference - -For basic usage, see [Quick Start](#quick-start) section above. - -### Basic Syntax - -```bash -./deploy-scripts/deploy-clm.sh --action [OPTIONS] -``` - -### Required Flags - -| Flag | Description | -|------|-------------| -| `--action ` | Action to perform: `install` or `uninstall` | - -### Optional Flags - -#### General Options - -| Flag | Description | Default | -|------|-------------|---------| -| `--namespace ` | Kubernetes namespace for deployment | `hyperfleet-e2e-$USER` | -| `--dry-run` | Print commands without executing | `false` | -| `--verbose` | Enable verbose logging | `false` | -| `--help` | Show help message | - | - -#### Component Selection - -| Flag | Description | -|------|-------------| -| `--skip-api` | Skip API component installation | -| `--skip-sentinel` | Skip Sentinel component installation | -| `--skip-adapter` | Skip Adapter component installation | - -#### Image Configuration - -| Flag | Description | Default | -|------|-------------|---------| -| `--image-registry ` | Image registry for all components | `registry.ci.openshift.org/ci` | -| `--api-image-repo ` | API image repository (without registry) | `hyperfleet-api` | -| `--api-image-tag ` | API image tag | `latest` | -| `--sentinel-image-repo ` | Sentinel image repository (without registry) | `hyperfleet-sentinel` | -| `--sentinel-image-tag ` | Sentinel image tag | `latest` | -| `--adapter-image-repo ` | Adapter image repository (without registry) | `hyperfleet-adapter` | -| `--adapter-image-tag ` | Adapter image tag | `latest` | - -**Notes**: -- Helm chart sources are fixed and pulled from the official component repositories at the `main` branch -- Final image path format: `${IMAGE_REGISTRY}/${IMAGE_REPO}:${IMAGE_TAG}` -- Example: `registry.ci.openshift.org/ci/hyperfleet-api:latest` - -## Examples - -### Installation Examples - -#### 1. Install with Default Settings - -```bash -./deploy-scripts/deploy-clm.sh --action install --namespace -``` - -This installs all three components (API, Sentinel, Adapter) with default configurations. - -#### 2. Install Only API and Sentinel - -```bash -./deploy-scripts/deploy-clm.sh --action install \ - --namespace \ - --skip-adapter -``` - -#### 3. Install with Custom Image Tags - -```bash -./deploy-scripts/deploy-clm.sh --action install \ - --namespace \ - --api-image-tag v1.2.0 \ - --sentinel-image-tag v1.2.0 \ - --adapter-image-tag v1.2.0 -``` - -#### 4. Install with Custom Image Repository - -```bash -./deploy-scripts/deploy-clm.sh --action install \ - --namespace \ - --api-image-repo myregistry.io/hyperfleet-api \ - --api-image-tag pr-123 -``` - -#### 5. Dry-Run Installation (No Changes) - -```bash -./deploy-scripts/deploy-clm.sh --action install \ - --namespace \ - --dry-run \ - --verbose -``` - -This simulates the installation without making any actual changes. - -### Uninstallation Examples - -#### 1. Uninstall All Components - -```bash -./deploy-scripts/deploy-clm.sh --action uninstall --namespace -``` - -This removes all Helm releases. - -#### 2. Dry-Run Uninstallation - -```bash -./deploy-scripts/deploy-clm.sh --action uninstall \ - --namespace \ - --dry-run \ - --verbose -``` - -#### 3. Uninstall Specific Components Only - -```bash -./deploy-scripts/deploy-clm.sh --action uninstall \ - --namespace \ - --skip-api \ - --skip-sentinel -``` - -This only uninstalls the Adapter component. - -## Script Workflow - -### Installation Flow - -1. **Dependency Checks**: Validates that `kubectl`, `helm`, and `git` are available -2. **Context Validation**: Verifies kubectl context and cluster connectivity -3. **Chart Cloning**: Clones Helm charts from Git repositories -4. **Component Installation**: Installs components in order (API → Sentinel → Adapter) using `helm upgrade --install` with `--create-namespace` -5. **Pod Health Verification**: Verifies all pods are running and healthy (detects CrashLoopBackOff, ImagePullBackOff, etc.) -6. **Status Reporting**: Displays deployment status and usage instructions - -If any component fails health verification, the script automatically retrieves pod logs for troubleshooting and exits with an error status. - -### Uninstallation Flow - -1. **Dependency Checks**: Validates required tools -2. **Context Validation**: Verifies kubectl context -3. **User Confirmation**: Prompts for confirmation (unless `--dry-run`) -4. **Component Removal**: Uninstalls Helm releases in reverse order (Adapter → Sentinel → API) - this automatically removes all resources -5. **Cleanup**: Removes temporary working directories - -## Namespace Management - -The script leverages Helm's built-in namespace management: - -- **Installation**: Namespace is automatically created by Helm using the `--create-namespace` flag -- **Uninstallation**: Resources are removed by `helm uninstall`, but the namespace is **not deleted** -- **Uniqueness**: Each deployment requires a unique namespace to prevent GCP Pub/Sub resource collisions. - -If you want to completely remove the namespace after uninstallation: - -```bash -# Uninstall components -./deploy-scripts/deploy-clm.sh --action uninstall --namespace - -# Manually delete namespace if desired -kubectl delete namespace -``` - -This design allows you to: -- Reuse the same namespace for multiple install/uninstall cycles -- Keep other resources in the namespace that aren't managed by Helm -- Manually inspect resources after uninstallation for debugging - -## Troubleshooting - -### Debugging - -Use `--dry-run --verbose` flags to see what the script would do without making changes: - -```bash -./deploy-scripts/deploy-clm.sh --action install \ - --namespace \ - --dry-run \ - --verbose -``` - -Check Helm deployment status: - -```bash -helm list -n -kubectl get pods -n -kubectl logs -n -``` - -View script execution with bash trace: - -```bash -bash -x deploy-scripts/deploy-clm.sh --action install --namespace -``` - -## Integration with E2E Tests - -### Pre-Test Setup - -Before running E2E tests, deploy the CLM components: - -```bash -# Deploy test environment -./deploy-scripts/deploy-clm.sh --action install --namespace - -# Configure E2E test API URL -EXTERNAL_IP=$(kubectl get svc hyperfleet-api -n $NAMESPACE -o jsonpath='{.status.loadBalancer.ingress[0].ip}') -export HYPERFLEET_API_URL="http://${EXTERNAL_IP}:8000" - -# Run E2E tests -./bin/hyperfleet-e2e test --label-filter=tier0 -``` - -### Post-Test Cleanup - -After tests complete: - -```bash -./deploy-scripts/deploy-clm.sh --action uninstall --namespace -``` - -## Script Output - -The script provides structured log output with the following levels: - -- **[INFO]**: Informational messages -- **[SUCCESS]**: Successful operations -- **[WARNING]**: Warnings (non-critical) -- **[ERROR]**: Errors (critical failures) -- **[VERBOSE]**: Detailed debug information (when `--verbose` is enabled) - -## Best Practices - -1. **Use Dry-Run First**: Always test with `--dry-run` before actual deployment -2. **Namespace Isolation**: Use dedicated namespaces for different test environments -3. **Tag Specificity**: Use specific image tags instead of `latest` for reproducible deployments -4. **Cleanup**: Always cleanup test environments after use to save resources -5. **Verbose Logging**: Use `--verbose` when troubleshooting issues -6. **Version Alignment**: Deploy matching versions of all components together - diff --git a/deploy-scripts/deploy-clm.sh b/deploy-scripts/deploy-clm.sh deleted file mode 100755 index c338447..0000000 --- a/deploy-scripts/deploy-clm.sh +++ /dev/null @@ -1,602 +0,0 @@ -#!/usr/bin/env bash - -# deploy-clm.sh - Automated CLM Components Deployment Script -# -# This script automates the installation and uninstallation of HyperFleet CLM components -# (API, Sentinel, and Adapters) using Helm for E2E testing environments. -# -# Usage: -# ./deploy-clm.sh --action install -# ./deploy-clm.sh --action uninstall --dry-run --namespace -# -# Set NAMESPACE in .env file or use --namespace flag. -# Each deployment requires a unique namespace to prevent GCP Pub/Sub resource collisions. - -set -euo pipefail - -# ============================================================================ -# Working Directories (must be set before loading .env) -# ============================================================================ -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" -WORK_DIR="${PROJECT_ROOT}/.deploy-work" -TESTDATA_DIR="${TESTDATA_DIR:-${PROJECT_ROOT}/testdata}" - -# ============================================================================ -# Load Environment Variables from .env file -# ============================================================================ -ENV_FILE="${SCRIPT_DIR}/.env" - -if [[ -f "${ENV_FILE}" ]]; then - set -a # automatically export all variables - source "${ENV_FILE}" - set +a -else - echo "[WARNING] .env file not found at ${ENV_FILE}" - echo "[WARNING] Using default configuration values" -fi - -# ============================================================================ -# Default Configuration (fallback if .env is not loaded) -# ============================================================================ - -ACTION="${ACTION:-}" -NAMESPACE="${NAMESPACE:-}" -DRY_RUN="${DRY_RUN:-false}" -VERBOSE="${VERBOSE:-false}" - -# Image Registry -IMAGE_REGISTRY="${IMAGE_REGISTRY:-registry.ci.openshift.org}" -IMAGE_PULL_POLICY="${IMAGE_PULL_POLICY:-Always}" - -# Provider Configuration -GCP_PROJECT_ID="${GCP_PROJECT_ID:-hcm-hyperfleet}" - -# API Component -API_IMAGE_REPO="${API_IMAGE_REPO:-ci/hyperfleet-api}" -API_IMAGE_TAG="${API_IMAGE_TAG:-latest}" -API_SERVICE_TYPE="${API_SERVICE_TYPE:-LoadBalancer}" -API_ADAPTERS_CLUSTER="${API_ADAPTERS_CLUSTER:-}" -API_ADAPTERS_NODEPOOL="${API_ADAPTERS_NODEPOOL:-}" - -# Adapter Test Data Configuration -ADAPTERS_FILE_DIR="${ADAPTERS_FILE_DIR:-${TESTDATA_DIR}/adapter-configs}" -CLUSTER_TIER0_ADAPTERS_DEPLOYMENT="${CLUSTER_TIER0_ADAPTERS_DEPLOYMENT:-}" -NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT="${NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT:-}" - -# Sentinel Component -SENTINEL_IMAGE_REPO="${SENTINEL_IMAGE_REPO:-ci/hyperfleet-sentinel}" -SENTINEL_IMAGE_TAG="${SENTINEL_IMAGE_TAG:-latest}" -SENTINEL_BROKER_TYPE="${SENTINEL_BROKER_TYPE:-googlepubsub}" -SENTINEL_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING="${SENTINEL_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING:-true}" -SENTINEL_BROKER_RABBITMQ_URL="${SENTINEL_BROKER_RABBITMQ_URL:-}" - -# Adapter Component -ADAPTER_IMAGE_REPO="${ADAPTER_IMAGE_REPO:-ci/hyperfleet-adapter}" -ADAPTER_IMAGE_TAG="${ADAPTER_IMAGE_TAG:-latest}" -ADAPTER_BROKER_TYPE="${ADAPTER_BROKER_TYPE:-googlepubsub}" -ADAPTER_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING="${ADAPTER_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING:-true}" -ADAPTER_GOOGLEPUBSUB_CREATE_SUBSCRIPTION_IF_MISSING="${ADAPTER_GOOGLEPUBSUB_CREATE_SUBSCRIPTION_IF_MISSING:-true}" -ADAPTER_BROKER_RABBITMQ_URL="${ADAPTER_BROKER_RABBITMQ_URL:-}" - -# HyperFleet API Configuration -API_BASE_URL="${API_BASE_URL:-http://hyperfleet-api:8000}" - -# Helm Chart Sources -API_CHART_REPO="${API_CHART_REPO:-https://github.com/openshift-hyperfleet/hyperfleet-api.git}" -API_CHART_REF="${API_CHART_REF:-main}" -API_CHART_PATH="${API_CHART_PATH:-charts}" - -SENTINEL_CHART_REPO="${SENTINEL_CHART_REPO:-https://github.com/openshift-hyperfleet/hyperfleet-sentinel.git}" -SENTINEL_CHART_REF="${SENTINEL_CHART_REF:-main}" -SENTINEL_CHART_PATH="${SENTINEL_CHART_PATH:-charts}" - -ADAPTER_CHART_REPO="${ADAPTER_CHART_REPO:-https://github.com/openshift-hyperfleet/hyperfleet-adapter.git}" -ADAPTER_CHART_REF="${ADAPTER_CHART_REF:-main}" -ADAPTER_CHART_PATH="${ADAPTER_CHART_PATH:-charts}" - -# Component flags -INSTALL_API="${INSTALL_API:-true}" -INSTALL_SENTINEL="${INSTALL_SENTINEL:-true}" -INSTALL_ADAPTER="${INSTALL_ADAPTER:-true}" - -# Uninstall options -DELETE_K8S_RESOURCES="${DELETE_K8S_RESOURCES:-false}" -DELETE_CLOUD_RESOURCES="${DELETE_CLOUD_RESOURCES:-false}" -DELETE_ALL="${DELETE_ALL:-false}" - -# Debug logging -DEBUG_LOG_DIR="${DEBUG_LOG_DIR:-${PROJECT_ROOT}/.debug-work}" - -# ============================================================================ -# Load Library Modules -# ============================================================================ - -source "${SCRIPT_DIR}/lib/common.sh" -source "${SCRIPT_DIR}/lib/helm.sh" -source "${SCRIPT_DIR}/lib/api.sh" -source "${SCRIPT_DIR}/lib/sentinel.sh" -source "${SCRIPT_DIR}/lib/adapter.sh" -source "${SCRIPT_DIR}/lib/gcp.sh" - -# ============================================================================ -# Usage and Argument Parsing -# ============================================================================ - -print_usage() { - cat << EOF -Usage: ${0##*/} --action [OPTIONS] - -Automated deployment script for HyperFleet CLM components (API, Sentinel, Adapter) - -CONFIGURATION: - This script loads configuration from ${SCRIPT_DIR}/.env file. - Command-line flags override .env values. - - NAMESPACE should be unique to prevent GCP Pub/Sub collisions. - Recommended: set NAMESPACE=hyperfleet-e2e-\$USER in .env file. - -REQUIRED FLAGS: - --action Action to perform: install or uninstall - -OPTIONAL FLAGS: - --namespace Kubernetes namespace (default from .env: ${NAMESPACE}) - - # Component Selection - --skip-api Skip API installation - --skip-sentinel Skip Sentinel installation - --skip-adapter Skip Adapter installation - - # Image Configuration - --image-registry Image registry (default: ${IMAGE_REGISTRY}) - --api-image-repo API image repository (default: ${API_IMAGE_REPO}) - --api-image-tag API image tag (default: ${API_IMAGE_TAG}) - --sentinel-image-repo Sentinel image repository (default: ${SENTINEL_IMAGE_REPO}) - --sentinel-image-tag Sentinel image tag (default: ${SENTINEL_IMAGE_TAG}) - --adapter-image-repo Adapter image repository (default: ${ADAPTER_IMAGE_REPO}) - --adapter-image-tag Adapter image tag (default: ${ADAPTER_IMAGE_TAG}) - - # API Configuration - --api-base-url HyperFleet API base URL for Sentinel and Adapter - (default: http://hyperfleet-api..svc.cluster.local:8000) - --api-adapters-cluster Comma-separated list of cluster adapters for API config (e.g., "cl-namespace,cl-job") - --api-adapters-nodepool Comma-separated list of nodepool adapters for API config (e.g., "np-configmap") - - # Adapter Deployment Configuration - --cluster-tier0-adapters Comma-separated list of cluster-level adapters to deploy (e.g., "cl-namespace,cl-job") - --nodepool-tier0-adapters Comma-separated list of nodepool-level adapters to deploy (e.g., "np-configmap") - --adapters-file-dir Base directory containing adapter test data folders (default: ${TESTDATA_DIR}/adapter-configs) - - # Uninstall Options (only for --action uninstall) - --delete-k8s-resources Delete Kubernetes resources (Helm releases + namespace) - --delete-cloud-resources Delete GCP Pub/Sub topics and subscriptions - --all Delete everything (k8s resources + cloud resources) - - # Execution Options - --dry-run Print commands without executing - --verbose Enable verbose logging - --debug-log-dir Directory to save debug logs on deployment failures - (default: ${WORK_DIR}/debug-logs) - --help Show this help message - -ENVIRONMENT VARIABLES: - All configuration can be set in the .env file located at: ${SCRIPT_DIR}/.env - - Common variables: - - NAMESPACE Kubernetes namespace - - IMAGE_REGISTRY Container image registry - - API_IMAGE_TAG API image tag - - SENTINEL_IMAGE_TAG Sentinel image tag - - ADAPTER_IMAGE_TAG Adapter image tag - - GCP_PROJECT_ID Google Cloud Project ID for Pub/Sub - - TESTDATA_DIR Base directory for test data (default: PROJECT_ROOT/testdata) - - CLUSTER_TIER0_ADAPTERS_DEPLOYMENT Cluster-level adapters to deploy (comma-separated) - - NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT NodePool-level adapters to deploy (comma-separated) - - ADAPTERS_FILE_DIR Base directory for adapter test data (default: TESTDATA_DIR/adapter-configs) - - API_ADAPTERS_CLUSTER Adapters for API cluster config (set per test case) - - API_ADAPTERS_NODEPOOL Adapters for API nodepool config (set per test case) - - RabbitMQ broker (must be provisioned externally before running this script): - - SENTINEL_BROKER_TYPE=rabbitmq Use RabbitMQ instead of Google Pub/Sub for Sentinel - - SENTINEL_BROKER_RABBITMQ_URL RabbitMQ AMQP URL for Sentinel (e.g., amqp://user:pass@host:5672/) - - ADAPTER_BROKER_TYPE=rabbitmq Use RabbitMQ instead of Google Pub/Sub for Adapters - - ADAPTER_BROKER_RABBITMQ_URL RabbitMQ AMQP URL for Adapters - -EXAMPLES: - # Install with .env defaults - ${0##*/} --action install - - # Install with explicit namespace - ${0##*/} --action install --namespace - - # Install with custom image tags - ${0##*/} --action install \\ - --namespace \\ - --api-image-tag v1.0.0 \\ - --sentinel-image-tag v1.0.0 \\ - --adapter-image-tag v1.0.0 - - # Install only API and Sentinel - ${0##*/} --action install --namespace --skip-adapter - - # Dry-run to preview actions - ${0##*/} --action uninstall --namespace --dry-run --verbose - - # Delete Kubernetes resources - ${0##*/} --action uninstall --namespace --delete-k8s-resources - - # Delete GCP Pub/Sub resources - ${0##*/} --action uninstall --namespace --delete-cloud-resources - - # Complete cleanup: delete everything - ${0##*/} --action uninstall --namespace --all - - # Install with custom image repository - ${0##*/} --action install \\ - --namespace \\ - --api-image-repo myregistry.io/hyperfleet-api \\ - --api-image-tag dev-123 - -EOF -} - -parse_arguments() { - if [[ $# -eq 0 ]]; then - print_usage - exit 1 - fi - - while [[ $# -gt 0 ]]; do - case "$1" in - --action) - ACTION="$2" - shift 2 - ;; - --namespace) - NAMESPACE="$2" - shift 2 - ;; - --skip-api) - INSTALL_API=false - shift - ;; - --skip-sentinel) - INSTALL_SENTINEL=false - shift - ;; - --skip-adapter) - INSTALL_ADAPTER=false - shift - ;; - --image-registry) - IMAGE_REGISTRY="$2" - shift 2 - ;; - --api-image-repo) - API_IMAGE_REPO="$2" - shift 2 - ;; - --api-image-tag) - API_IMAGE_TAG="$2" - shift 2 - ;; - --sentinel-image-repo) - SENTINEL_IMAGE_REPO="$2" - shift 2 - ;; - --sentinel-image-tag) - SENTINEL_IMAGE_TAG="$2" - shift 2 - ;; - --adapter-image-repo) - ADAPTER_IMAGE_REPO="$2" - shift 2 - ;; - --adapter-image-tag) - ADAPTER_IMAGE_TAG="$2" - shift 2 - ;; - --api-base-url) - API_BASE_URL="$2" - shift 2 - ;; - --api-adapters-cluster) - API_ADAPTERS_CLUSTER="$2" - shift 2 - ;; - --api-adapters-nodepool) - API_ADAPTERS_NODEPOOL="$2" - shift 2 - ;; - --cluster-tier0-adapters) - CLUSTER_TIER0_ADAPTERS_DEPLOYMENT="$2" - shift 2 - ;; - --nodepool-tier0-adapters) - NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT="$2" - shift 2 - ;; - --adapters-file-dir) - ADAPTERS_FILE_DIR="$2" - shift 2 - ;; - --delete-k8s-resources) - DELETE_K8S_RESOURCES=true - shift - ;; - --delete-cloud-resources) - DELETE_CLOUD_RESOURCES=true - shift - ;; - --all) - DELETE_ALL=true - DELETE_K8S_RESOURCES=true - DELETE_CLOUD_RESOURCES=true - shift - ;; - --dry-run) - DRY_RUN=true - shift - ;; - --verbose) - VERBOSE=true - shift - ;; - --debug-log-dir) - DEBUG_LOG_DIR="$2" - shift 2 - ;; - --help|-h) - print_usage - exit 0 - ;; - *) - log_error "Unknown option: $1" - echo - print_usage - exit 1 - ;; - esac - done - - # Validate required arguments - if [[ -z "${ACTION}" ]]; then - log_error "Missing required flag: --action" - echo - print_usage - exit 1 - fi - - # Validate NAMESPACE variable is set - if [[ -z "${NAMESPACE}" ]]; then - log_error "Missing required flag: --namespace or env variable NAMESPACE" - echo - print_usage - exit 1 - fi - - if (( ${#NAMESPACE} > 63 )) || [[ ! "${NAMESPACE}" =~ ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ ]]; then - log_error "Invalid namespace: '${NAMESPACE}'. Must match DNS-1123 label format and be <= 63 chars." - exit 1 - fi - - if [[ "${ACTION}" != "install" && "${ACTION}" != "uninstall" ]]; then - log_error "Invalid action: ${ACTION}. Must be 'install' or 'uninstall'" - exit 1 - fi - - # Validate at least one component is selected - if [[ "${INSTALL_API}" == "false" && "${INSTALL_SENTINEL}" == "false" && "${INSTALL_ADAPTER}" == "false" ]]; then - log_error "At least one component must be selected for installation" - exit 1 - fi -} - -# ============================================================================ -# Main Installation Flow -# ============================================================================ - -perform_install() { - log_section "Starting CLM Components Installation" - - # Validate environment - check_dependencies || exit 1 - validate_kubectl_context || exit 1 - - # Prepare working directory - log_section "Preparing Working Directory" - mkdir -p "${WORK_DIR}" - log_verbose "Work directory: ${WORK_DIR}" - - # Clone Helm charts - log_section "Cloning Helm Charts" - - if [[ "${INSTALL_API}" == "true" ]]; then - clone_helm_chart "api" "${API_CHART_REPO}" "${API_CHART_REF}" "${API_CHART_PATH}" || exit 1 - fi - - if [[ "${INSTALL_SENTINEL}" == "true" ]]; then - clone_helm_chart "sentinel" "${SENTINEL_CHART_REPO}" "${SENTINEL_CHART_REF}" "${SENTINEL_CHART_PATH}" || exit 1 - fi - - if [[ "${INSTALL_ADAPTER}" == "true" ]]; then - clone_helm_chart "adapter" "${ADAPTER_CHART_REPO}" "${ADAPTER_CHART_REF}" "${ADAPTER_CHART_PATH}" || exit 1 - fi - - # Install components in order: API -> Sentinel -> Adapter - if [[ "${INSTALL_API}" == "true" ]]; then - install_api || exit 1 - fi - - if [[ "${INSTALL_SENTINEL}" == "true" ]]; then - install_sentinel || exit 1 - fi - - if [[ "${INSTALL_ADAPTER}" == "true" ]]; then - install_adapters || { - log_error "Adapter installation failed" - log_section "Installation Failed" - exit 1 - } - fi - - # Final status - log_section "Installation Complete" - - if [[ "${DRY_RUN}" == "false" ]]; then - log_info "Deployed components:" - helm list -n "${NAMESPACE}" - - echo - log_info "Pod status:" - kubectl get pods -n "${NAMESPACE}" - - echo - log_success "All components installed successfully!" - log_info "Namespace: ${NAMESPACE}" - log_info "To view logs: kubectl logs -n ${NAMESPACE} -l app.kubernetes.io/name=" - - # Display API external IP if available - if [[ "${INSTALL_API}" == "true" ]]; then - local external_ip - external_ip=$(kubectl get svc "hyperfleet-api" -n "${NAMESPACE}" -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null) - if [[ -n "${external_ip}" ]]; then - echo - log_info "HyperFleet API External IP: ${external_ip}" - log_info "API URL: http://${external_ip}:8000" - fi - fi - else - log_info "[DRY-RUN] Installation simulation complete" - fi - - # Clean up work directory - if [[ "${DRY_RUN}" == "false" && "${VERBOSE}" == "false" ]]; then - log_verbose "Cleaning up work directory" - rm -rf "${WORK_DIR}" - fi -} - -# ============================================================================ -# Main Uninstallation Flow -# ============================================================================ - -perform_uninstall() { - log_section "Starting CLM Components Uninstallation" - - # Validate environment - check_dependencies || exit 1 - validate_kubectl_context || exit 1 - - # Display uninstall configuration - log_info "Uninstall Configuration:" - log_info " Delete K8s Resources (including namespace): ${DELETE_K8S_RESOURCES}" - log_info " Delete Cloud Resources: ${DELETE_CLOUD_RESOURCES}" - - local uninstall_errors=0 - - # Uninstall Kubernetes resources (in reverse order: Adapter -> Sentinel -> API) - if [[ "${DELETE_K8S_RESOURCES}" == "true" ]]; then - log_section "Uninstalling Kubernetes Resources" - - if [[ "${INSTALL_ADAPTER}" == "true" ]]; then - if ! uninstall_adapters; then - ((uninstall_errors++)) - fi - fi - - if [[ "${INSTALL_SENTINEL}" == "true" ]]; then - if ! uninstall_sentinel; then - ((uninstall_errors++)) - fi - fi - - if [[ "${INSTALL_API}" == "true" ]]; then - if ! uninstall_api; then - log_warning "Failed to uninstall API" - ((uninstall_errors++)) - fi - fi - - # Delete namespace (this will remove any remaining k8s resources) - if ! delete_namespace "${NAMESPACE}"; then - log_warning "Failed to delete namespace" - ((uninstall_errors++)) - fi - else - log_info "Skipping Kubernetes resource deletion (use --delete-k8s-resources to enable)" - fi - - # Delete GCP resources (topics and subscriptions) - if [[ "${DELETE_CLOUD_RESOURCES}" == "true" ]]; then - log_section "Deleting Cloud Provider Resources" - if ! cleanup_gcp_resources "${NAMESPACE}"; then - log_warning "Some GCP resources failed to delete" - ((uninstall_errors++)) - fi - else - log_info "Skipping cloud resource deletion (use --delete-cloud-resources to enable)" - fi - - # Final status - log_section "Uninstallation Complete" - - if [[ "${DRY_RUN}" == "false" ]]; then - # Show summary of what was deleted - echo - log_info "Summary:" - [[ "${DELETE_K8S_RESOURCES}" == "true" ]] && log_info " ✓ K8s resources and namespace" - [[ "${DELETE_CLOUD_RESOURCES}" == "true" ]] && log_info " ✓ Cloud resources" - - echo - if [[ ${uninstall_errors} -eq 0 ]]; then - log_success "Uninstallation completed successfully!" - else - log_error "Uninstallation completed with ${uninstall_errors} error(s)" - log_error "Please check the logs above for details" - exit 1 - fi - else - log_info "[DRY-RUN] Uninstallation simulation complete" - fi - - # Clean up work directory - if [[ -d "${WORK_DIR}" ]]; then - log_verbose "Cleaning up work directory" - rm -rf "${WORK_DIR}" - fi -} - -# ============================================================================ -# Main Entry Point -# ============================================================================ - -main() { - parse_arguments "$@" - - log_section "CLM Components Deployment Script" - log_info "Action: ${ACTION}" - log_info "Namespace: ${NAMESPACE}" - log_info "Dry-run: ${DRY_RUN}" - log_info "Verbose: ${VERBOSE}" - - if [[ "${VERBOSE}" == "true" ]]; then - echo - log_verbose "Component Configuration:" - log_verbose " API: ${INSTALL_API} (${API_IMAGE_REPO}:${API_IMAGE_TAG})" - log_verbose " Sentinel: ${INSTALL_SENTINEL} (${SENTINEL_IMAGE_REPO}:${SENTINEL_IMAGE_TAG})" - log_verbose " Adapter: ${INSTALL_ADAPTER} (${ADAPTER_IMAGE_REPO}:${ADAPTER_IMAGE_TAG})" - fi - - case "${ACTION}" in - install) - perform_install - ;; - uninstall) - perform_uninstall - ;; - esac -} - -# Run main function -main "$@" diff --git a/deploy-scripts/kind-build-images.sh b/deploy-scripts/kind-build-images.sh deleted file mode 100755 index 467c442..0000000 --- a/deploy-scripts/kind-build-images.sh +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env bash -# -# kind-build-images.sh — Build and load HyperFleet images into kind -# -# Builds component images from local repos under PROJECTS_DIR and loads -# them into the kind cluster. No args = build all. Named args = build -# only those (use full repo names). -# -# Usage: -# ./deploy-scripts/kind-build-images.sh # Build all -# ./deploy-scripts/kind-build-images.sh hyperfleet-adapter # Build one -# ./deploy-scripts/kind-build-images.sh --no-cache # Force rebuild -# -# Env vars: -# PROJECTS_DIR Parent dir containing component repos (default: ~/projects) -# KIND_CLUSTER Kind cluster name (default: kind) - -set -euo pipefail - -PROJECTS_DIR="${PROJECTS_DIR:-${HOME}/projects}" -CI_REGISTRY="registry.ci.openshift.org/ci" -KIND_CLUSTER="${KIND_CLUSTER:-kind}" -CONTAINER_TOOL="${CONTAINER_TOOL:-$(command -v podman 2>/dev/null || command -v docker 2>/dev/null || true)}" -if [[ -z "${CONTAINER_TOOL}" ]]; then - echo "[ERROR] No container tool found (podman or docker). Install one or set CONTAINER_TOOL." - exit 1 -fi -NO_CACHE="" - -# The three platform components — each maps 1:1 to a Docker image. -# Adapter configs in testdata/ all share the same hyperfleet-adapter image. -COMPONENTS=("hyperfleet-api" "hyperfleet-sentinel" "hyperfleet-adapter") - -# ============================================================================ -# Parse args -# ============================================================================ - -TARGETS=() - -while [[ $# -gt 0 ]]; do - case "$1" in - --no-cache) NO_CACHE="--no-cache"; shift ;; - -h|--help) - echo "Usage: $0 [--no-cache] [COMPONENT...]" - echo "" - echo "Builds and loads HyperFleet images into kind from local repos." - echo "No args = build all. Named args = build only those." - echo "" - echo "Components: ${COMPONENTS[*]}" - echo "" - echo "Env: PROJECTS_DIR=${PROJECTS_DIR} KIND_CLUSTER=${KIND_CLUSTER} CONTAINER_TOOL=${CONTAINER_TOOL}" - exit 0 - ;; - -*) echo "Unknown option: $1"; exit 1 ;; - *) TARGETS+=("$1"); shift ;; - esac -done - -# Default: build all components -if [[ ${#TARGETS[@]} -eq 0 ]]; then - TARGETS=("${COMPONENTS[@]}") -fi - -# ============================================================================ -# Build and load -# ============================================================================ - -echo "=== Building HyperFleet images (cluster: ${KIND_CLUSTER}) ===" - -for name in "${TARGETS[@]}"; do - dir="${PROJECTS_DIR}/${name}" - - if [[ ! -d "${dir}" ]]; then - echo "[ERROR] ${name} not found at ${dir}" - echo " Clone it: git clone https://github.com/openshift-hyperfleet/${name}.git ${dir}" - echo " Or set PROJECTS_DIR to the parent directory containing your repos." - exit 1 - fi - - echo "[BUILD] ${name}..." - "${CONTAINER_TOOL}" build ${NO_CACHE} -t "${CI_REGISTRY}/${name}:latest" "${dir}" - - echo "[LOAD] ${name} -> kind..." - if [[ "$(basename "${CONTAINER_TOOL}")" == "podman" ]]; then - "${CONTAINER_TOOL}" save "${CI_REGISTRY}/${name}:latest" | kind load image-archive /dev/stdin --name "${KIND_CLUSTER}" - else - kind load docker-image "${CI_REGISTRY}/${name}:latest" --name "${KIND_CLUSTER}" - fi - echo "" -done - -echo "=== Done ===" diff --git a/deploy-scripts/kind-local.sh b/deploy-scripts/kind-local.sh deleted file mode 100755 index d6ad913..0000000 --- a/deploy-scripts/kind-local.sh +++ /dev/null @@ -1,222 +0,0 @@ -#!/usr/bin/env bash -# -# kind-local.sh — Manage a local kind cluster for E2E testing -# -# Wraps kind, helm, and deploy-clm.sh into a single workflow. -# Sources deploy-scripts/.env for all config (copy from .env.example). -# -# Usage: -# ./deploy-scripts/kind-local.sh up # Full setup -# ./deploy-scripts/kind-local.sh setup # Cluster + RabbitMQ + Maestro + images -# ./deploy-scripts/kind-local.sh deploy # Deploy components -# ./deploy-scripts/kind-local.sh port-forward # Forward API + Maestro -# ./deploy-scripts/kind-local.sh rebuild [component] # Rebuild image + restart -# ./deploy-scripts/kind-local.sh rebuild --no-cache [comp] # Force rebuild -# ./deploy-scripts/kind-local.sh down # Tear down -# -# See docs/local-kind-setup.md for the full guide. - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" - -# ============================================================================ -# Configuration — sourced from .env, with local-only defaults below -# ============================================================================ - -# shellcheck source=.env -[[ -f "${SCRIPT_DIR}/.env" ]] && source "${SCRIPT_DIR}/.env" - -# Local-only defaults (not in .env unless user added them) -KIND_CLUSTER="${KIND_CLUSTER:-kind}" -KIND_CONTEXT="kind-${KIND_CLUSTER}" -INFRA_DIR="${INFRA_DIR:-${HOME}/projects/hyperfleet-infra}" -PROJECTS_DIR="${PROJECTS_DIR:-${HOME}/projects}" -MAESTRO_NS="${MAESTRO_NS:-maestro}" -MAESTRO_CONSUMER="${MAESTRO_CONSUMER:-cluster1}" -MAESTRO_LOCAL_PORT="${MAESTRO_LOCAL_PORT:-8100}" -RABBITMQ_URL="${RABBITMQ_URL:-amqp://guest:guest@rabbitmq:5672}" - -# Override .env defaults for local kind -NAMESPACE="${NAMESPACE:-hyperfleet-local}" -IMAGE_PULL_POLICY="IfNotPresent" -API_SERVICE_TYPE="ClusterIP" - -# Map .env adapter names -CLUSTER_ADAPTERS="${CLUSTER_TIER0_ADAPTERS_DEPLOYMENT:-cl-namespace,cl-job,cl-deployment,cl-maestro}" -NODEPOOL_ADAPTERS="${NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT:-np-configmap}" - -# ============================================================================ -# Helpers -# ============================================================================ - -require_kind_context() { - if ! kubectl config get-contexts "${KIND_CONTEXT}" &>/dev/null; then - echo "ERROR: kind context ${KIND_CONTEXT} not found. Run: kind create cluster --name ${KIND_CLUSTER}" - exit 1 - fi - local current - current="$(kubectl config current-context 2>/dev/null || true)" - if [[ "${current}" != "${KIND_CONTEXT}" ]]; then - echo "Switching to kind context: ${KIND_CONTEXT}" - kubectl config use-context "${KIND_CONTEXT}" - fi -} - -kill_port_forwards() { - pkill -f "kubectl.*port-forward.*hyperfleet-api" 2>/dev/null || true - pkill -f "kubectl.*port-forward.*maestro" 2>/dev/null || true - sleep 1 -} - -# ============================================================================ -# Commands -# ============================================================================ - -cmd_setup() { - echo "=== Creating kind cluster ===" - kind get clusters 2>/dev/null | grep -q "^${KIND_CLUSTER}$" || kind create cluster --name "${KIND_CLUSTER}" - kubectl create namespace "${NAMESPACE}" --dry-run=client -o yaml | kubectl --context "${KIND_CONTEXT}" apply -f - - - echo "=== Installing RabbitMQ ===" - kubectl --context "${KIND_CONTEXT}" apply -f "${INFRA_DIR}/manifests/rabbitmq.yaml" --namespace "${NAMESPACE}" - echo "Waiting for RabbitMQ..." - local retries=60 - until kubectl --context "${KIND_CONTEXT}" get pod -l app=rabbitmq -n "${NAMESPACE}" --no-headers 2>/dev/null | grep -q .; do - ((retries--)) || { echo "ERROR: Timed out waiting for RabbitMQ pod"; exit 1; } - sleep 2 - done - kubectl --context "${KIND_CONTEXT}" wait --for=condition=ready pod -l app=rabbitmq --namespace "${NAMESPACE}" --timeout=120s - - echo "=== Installing Maestro ===" - make -C "${INFRA_DIR}" install-maestro NAMESPACE="${MAESTRO_NS}" KUBECONFIG="${HOME}/.kube/config" - make -C "${INFRA_DIR}" create-maestro-consumer MAESTRO_CONSUMER="${MAESTRO_CONSUMER}" NAMESPACE="${MAESTRO_NS}" KUBECONFIG="${HOME}/.kube/config" - - echo "=== Building images ===" - "${SCRIPT_DIR}/kind-build-images.sh" "$@" -} - -cmd_deploy() { - require_kind_context - - echo "=== Deploying API + Sentinels + Adapters ===" - SENTINEL_BROKER_RABBITMQ_URL="${RABBITMQ_URL}" \ - ADAPTER_BROKER_RABBITMQ_URL="${RABBITMQ_URL}" \ - ADAPTER_BROKER_TYPE=rabbitmq \ - SENTINEL_BROKER_TYPE=rabbitmq \ - IMAGE_PULL_POLICY="${IMAGE_PULL_POLICY}" \ - NAMESPACE="${NAMESPACE}" \ - API_SERVICE_TYPE="${API_SERVICE_TYPE}" \ - API_ADAPTERS_CLUSTER="${CLUSTER_ADAPTERS}" \ - API_ADAPTERS_NODEPOOL="${NODEPOOL_ADAPTERS}" \ - CLUSTER_TIER0_ADAPTERS_DEPLOYMENT="${CLUSTER_ADAPTERS}" \ - NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT="${NODEPOOL_ADAPTERS}" \ - "${SCRIPT_DIR}/deploy-clm.sh" --action install -} - -cmd_down() { - require_kind_context - - kill_port_forwards - - NAMESPACE="${NAMESPACE}" \ - CLUSTER_TIER0_ADAPTERS_DEPLOYMENT="${CLUSTER_ADAPTERS}" \ - NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT="${NODEPOOL_ADAPTERS}" \ - "${SCRIPT_DIR}/deploy-clm.sh" --action uninstall --delete-k8s-resources -} - -cmd_port_forward() { - kill_port_forwards - - kubectl --context "${KIND_CONTEXT}" port-forward -n "${NAMESPACE}" svc/hyperfleet-api 8000:8000 & - kubectl --context "${KIND_CONTEXT}" port-forward -n "${MAESTRO_NS}" svc/maestro "${MAESTRO_LOCAL_PORT}":8000 & - - local api_ready=false - for _ in $(seq 1 10); do - sleep 2 - if curl -sf http://localhost:8000/api/hyperfleet/v1/clusters > /dev/null 2>&1; then - api_ready=true - break - fi - done - if [[ "${api_ready}" == true ]]; then - echo "API ready at http://localhost:8000" - else - echo "ERROR: API not reachable at localhost:8000" - exit 1 - fi - if curl -sf "http://localhost:${MAESTRO_LOCAL_PORT}/api/maestro/v1/consumers" > /dev/null 2>&1; then - echo "Maestro ready at http://localhost:${MAESTRO_LOCAL_PORT}" - else - echo "WARNING: Maestro not reachable at localhost:${MAESTRO_LOCAL_PORT}" - fi -} - -# rebuild — Rebuild image(s), load into kind, restart affected deployments. -# Args forwarded to kind-build-images.sh (component names, --no-cache). -cmd_rebuild() { - require_kind_context - - "${SCRIPT_DIR}/kind-build-images.sh" "$@" - - # Figure out what to restart based on args (skip --no-cache flag) - local components=() - for arg in "$@"; do - [[ "${arg}" == --* ]] && continue - components+=("${arg}") - done - - if [[ ${#components[@]} -eq 0 ]]; then - echo "=== Restarting all deployments (excluding postgresql) ===" - local deploys - deploys=$(kubectl --context "${KIND_CONTEXT}" get deployments -n "${NAMESPACE}" -o name \ - | grep -v postgresql) - echo "${deploys}" | xargs kubectl --context "${KIND_CONTEXT}" rollout restart -n "${NAMESPACE}" - echo "${deploys}" | xargs -I{} kubectl --context "${KIND_CONTEXT}" rollout status {} -n "${NAMESPACE}" --timeout=120s - else - for comp in "${components[@]}"; do - echo "=== Restarting ${comp} ===" - kubectl --context "${KIND_CONTEXT}" rollout restart deployment \ - -n "${NAMESPACE}" -l "app.kubernetes.io/name=${comp},app.kubernetes.io/component!=postgresql" - kubectl --context "${KIND_CONTEXT}" rollout status deployment \ - -n "${NAMESPACE}" -l "app.kubernetes.io/name=${comp}" --timeout=120s - done - fi - - echo "=== Re-establishing port-forwards ===" - cmd_port_forward -} - -cmd_up() { - cmd_setup "$@" - cmd_deploy - cmd_port_forward -} - -# ============================================================================ -# Entrypoint -# ============================================================================ - -case "${1:-}" in - up) shift; cmd_up "$@" ;; - setup) shift; cmd_setup "$@" ;; - deploy) cmd_deploy ;; - down|undeploy) cmd_down ;; - port-forward) cmd_port_forward ;; - rebuild) shift; cmd_rebuild "$@" ;; - *) - echo "Usage: $0 {up|setup|deploy|down|port-forward|rebuild}" - echo "" - echo " up [COMPONENTS...] Full setup from scratch" - echo " setup [COMPONENTS...] Cluster + RabbitMQ + Maestro + build images" - echo " deploy Deploy API + sentinels + adapters" - echo " down Remove all + kill port-forwards" - echo " port-forward Forward API (:8000) + Maestro (:${MAESTRO_LOCAL_PORT})" - echo " rebuild [--no-cache] [COMP...] Rebuild image(s) + restart + port-forward" - echo "" - echo " COMPONENTS: e.g. 'hyperfleet-adapter' (default: all)" - echo " Config: deploy-scripts/.env (copy from .env.example)" - exit 1 - ;; -esac diff --git a/deploy-scripts/lib/adapter.sh b/deploy-scripts/lib/adapter.sh deleted file mode 100755 index 6bd092d..0000000 --- a/deploy-scripts/lib/adapter.sh +++ /dev/null @@ -1,384 +0,0 @@ -#!/usr/bin/env bash - -# adapter.sh - HyperFleet Adapter component deployment functions -# -# This module handles discovery, installation, and uninstallation of adapters -# from the ${ADAPTERS_FILE_DIR} directory (defaults to ${TESTDATA_DIR}/adapter-configs) - -# ============================================================================ -# Adapter Discovery Functions -# ============================================================================ - -discover_adapters() { - # Use ADAPTERS_FILE_DIR env var, fallback to default - local adapter_configs_dir="${ADAPTERS_FILE_DIR:-${TESTDATA_DIR}/adapter-configs}" - - if [[ ! -d "${adapter_configs_dir}" ]]; then - log_verbose "Adapter configs directory not found: ${adapter_configs_dir}" >&2 - return 1 - fi - - # Read adapter names from environment variables - local cluster_adapters="${CLUSTER_TIER0_ADAPTERS_DEPLOYMENT:-}" - local nodepool_adapters="${NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT:-}" - - if [[ -z "${cluster_adapters}" && -z "${nodepool_adapters}" ]]; then - log_error "No adapters specified. Set CLUSTER_TIER0_ADAPTERS_DEPLOYMENT and/or NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT" >&2 - return 1 - fi - - # Build list of adapter directories from environment variables - local adapter_dirs=() - - # Add cluster adapters - if [[ -n "${cluster_adapters}" ]]; then - IFS=',' read -ra cluster_adapter_array <<<"${cluster_adapters}" - for adapter_name in "${cluster_adapter_array[@]}"; do - # Trim whitespace - adapter_name=$(echo "${adapter_name}" | xargs) - # Validate adapter name is not empty (prevents issues from trailing commas) - if [[ -z "${adapter_name}" ]]; then - log_error "Empty adapter name in CLUSTER_TIER0_ADAPTERS_DEPLOYMENT (check for trailing commas)" >&2 - return 1 - fi - if [[ -d "${adapter_configs_dir}/${adapter_name}" ]]; then - adapter_dirs+=("clusters|${adapter_name}") - else - log_error "Cluster adapter directory not found: ${adapter_configs_dir}/${adapter_name}" >&2 - return 1 - fi - done - fi - - # Add nodepool adapters - if [[ -n "${nodepool_adapters}" ]]; then - IFS=',' read -ra nodepool_adapter_array <<<"${nodepool_adapters}" - for adapter_name in "${nodepool_adapter_array[@]}"; do - # Trim whitespace - adapter_name=$(echo "${adapter_name}" | xargs) - # Validate adapter name is not empty (prevents issues from trailing commas) - if [[ -z "${adapter_name}" ]]; then - log_error "Empty adapter name in NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT (check for trailing commas)" >&2 - return 1 - fi - if [[ -d "${adapter_configs_dir}/${adapter_name}" ]]; then - adapter_dirs+=("nodepools|${adapter_name}") - else - log_error "NodePool adapter directory not found: ${adapter_configs_dir}/${adapter_name}" >&2 - return 1 - fi - done - fi - - if [[ ${#adapter_dirs[@]} -eq 0 ]]; then - log_verbose "No adapter configurations found" >&2 - return 1 - fi - - log_info "Found ${#adapter_dirs[@]} adapter(s) to deploy:" >&2 - for dir in "${adapter_dirs[@]}"; do - log_info " - ${dir}" >&2 - done - - # Export for use in other functions - # Format: resource_type|adapter_name (e.g., "clusters|cl-namespace") - printf '%s\n' "${adapter_dirs[@]}" -} - -# ============================================================================ -# Adapter Installation Functions -# ============================================================================ - -install_adapter_instance() { - local dir_name="$1" - - log_section "Installing Adapter: ${dir_name}" - - # Extract resource_type and adapter_name from format: resource_type|adapter_name - local resource_type="${dir_name%%|*}" - local adapter_name="${dir_name##*|}" - - # Validate the descriptor format and ensure both parts are non-empty - if [[ -z "${resource_type}" || -z "${adapter_name}" || "${dir_name}" != *"|"* ]]; then - log_error "Invalid adapter descriptor '${dir_name}'. Expected format: resource_type|adapter_name" - return 1 - fi - - log_info "Resource type: ${resource_type}" - log_info "Adapter name: ${adapter_name}" - - # Temporary workaround for installation - HYPERFLEET-1097 - local release_name="${adapter_name}" - - log_info "Release name: ${release_name} (length: ${#release_name})" - - # Source adapter config directory (using ADAPTERS_FILE_DIR env var) - local adapter_configs_dir="${ADAPTERS_FILE_DIR:-${TESTDATA_DIR}/adapter-configs}" - local source_adapter_dir="${adapter_configs_dir}/${adapter_name}" - - if [[ ! -d "${source_adapter_dir}" ]]; then - log_error "Adapter config directory not found: ${source_adapter_dir}" - return 1 - fi - - # Chart path - local full_chart_path="${WORK_DIR}/adapter/${ADAPTER_CHART_PATH}" - - # Copy adapter config folder to chart directory - local dest_adapter_dir="${full_chart_path}/${adapter_name}" - log_info "Copying adapter config from ${source_adapter_dir} to ${dest_adapter_dir}" - - if [[ -d "${dest_adapter_dir}" ]]; then - # Safety check: ensure dest_adapter_dir contains adapter_name to prevent accidental deletion - if [[ "${dest_adapter_dir}" != *"${adapter_name}" || "${dest_adapter_dir}" == "/" || "${dest_adapter_dir}" == "${full_chart_path}" ]]; then - log_error "Safety check failed: refusing to delete suspicious path: ${dest_adapter_dir}" - return 1 - fi - log_verbose "Removing existing adapter config directory: ${dest_adapter_dir}" - rm -rf "${dest_adapter_dir}" - fi - - cp -r "${source_adapter_dir}" "${dest_adapter_dir}" - - # Patch imagePullPolicy in task resource YAMLs (what adapters create at runtime, e.g. Jobs/Deployments); - # helm --set image.pullPolicy only controls the adapter pod itself — these are separate concerns - if [[ "${IMAGE_PULL_POLICY}" != "Always" ]]; then - log_verbose "Patching imagePullPolicy to ${IMAGE_PULL_POLICY} in adapter resource manifests" - find "${dest_adapter_dir}" -name '*.yaml' -exec \ - sed -i.bak "s/imagePullPolicy: Always/imagePullPolicy: ${IMAGE_PULL_POLICY}/g" {} + - find "${dest_adapter_dir}" -name '*.bak' -delete 2>/dev/null - fi - - # Values file path (now in the chart directory) - local values_file="${dest_adapter_dir}/values.yaml" - if [[ ! -f "${values_file}" ]]; then - log_error "Values file not found: ${values_file}" - return 1 - fi - - # Construct subscription ID and topic names - # Allow override from environment variables, otherwise use auto-generated defaults - local subscription_id="${ADAPTER_SUBSCRIPTION_ID:-${NAMESPACE}-${resource_type}-${adapter_name}}" - local topic="${ADAPTER_TOPIC:-${NAMESPACE}-${resource_type}}" - local dead_letter_topic="${ADAPTER_DEAD_LETTER_TOPIC:-${NAMESPACE}-${resource_type}-dlq}" - - if [[ "${ADAPTER_BROKER_TYPE}" == "rabbitmq" && -z "${ADAPTER_BROKER_RABBITMQ_URL}" ]]; then - log_error "ADAPTER_BROKER_RABBITMQ_URL must be set when ADAPTER_BROKER_TYPE=rabbitmq" - return 1 - fi - - if [[ "${DRY_RUN}" == "true" ]]; then - log_info "[DRY-RUN] Would install adapter with:" - log_info " Release name: ${release_name}" - log_info " Namespace: ${NAMESPACE}" - log_info " Chart path: ${full_chart_path}" - log_info " Values file: ${values_file}" - log_info " Image: ${IMAGE_REGISTRY}/${ADAPTER_IMAGE_REPO}:${ADAPTER_IMAGE_TAG}" - log_info " Subscription ID: ${subscription_id}" - log_info " Topic: ${topic}" - log_info " Dead Letter Topic: ${dead_letter_topic}" - return 0 - fi - - # Build helm command with labels to track adapter metadata - local helm_cmd=( - helm upgrade --install - "${release_name}" - "${full_chart_path}" - --namespace "${NAMESPACE}" - --create-namespace - --wait - --timeout 5m - -f "${values_file}" - --set "fullnameOverride=${release_name}" - --set "image.registry=${IMAGE_REGISTRY}" - --set "image.repository=${ADAPTER_IMAGE_REPO}" - --set "image.tag=${ADAPTER_IMAGE_TAG}" - --set "image.pullPolicy=${IMAGE_PULL_POLICY}" - --set "broker.type=${ADAPTER_BROKER_TYPE}" - --set "broker.googlepubsub.projectId=${GCP_PROJECT_ID}" - --set "broker.googlepubsub.createTopicIfMissing=${ADAPTER_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING}" - --set "broker.googlepubsub.createSubscriptionIfMissing=${ADAPTER_GOOGLEPUBSUB_CREATE_SUBSCRIPTION_IF_MISSING}" - --set "broker.googlepubsub.subscriptionId=${subscription_id}" - --set "broker.googlepubsub.topic=${topic}" - --set "broker.googlepubsub.deadLetterTopic=${dead_letter_topic}" - --labels "adapter-resource-type=${resource_type},adapter-name=${adapter_name}" - ) - - if [[ "${ADAPTER_BROKER_TYPE}" == "rabbitmq" && -n "${ADAPTER_BROKER_RABBITMQ_URL}" ]]; then - local rabbitmq_queue="${ADAPTER_RABBITMQ_QUEUE:-${subscription_id}}" - local rabbitmq_exchange="${ADAPTER_RABBITMQ_EXCHANGE:-${topic}}" - local rabbitmq_routing_key="${ADAPTER_RABBITMQ_ROUTING_KEY:-#}" - helm_cmd+=( - --set "broker.rabbitmq.url=${ADAPTER_BROKER_RABBITMQ_URL}" - --set "broker.rabbitmq.queue=${rabbitmq_queue}" - --set "broker.rabbitmq.exchange=${rabbitmq_exchange}" - --set "broker.rabbitmq.routingKey=${rabbitmq_routing_key}" - ) - fi - - log_info "Executing Helm command:" - log_info "${helm_cmd[*]}" - echo - - if "${helm_cmd[@]}"; then - log_success "Adapter ${adapter_name} for ${resource_type} Helm release created successfully" - - # Verify pod health - log_info "Verifying pod health..." - if verify_pod_health "${NAMESPACE}" "app.kubernetes.io/instance=${release_name}" "${adapter_name}" 120 5; then - log_success "Adapter ${adapter_name} for ${resource_type} is running and healthy" - else - log_error "Adapter ${adapter_name} for ${resource_type} deployment failed health check" - - # Capture debug logs before cleanup - local debug_log_dir="${DEBUG_LOG_DIR:-${WORK_DIR}/debug-logs}" - capture_debug_logs "${NAMESPACE}" "app.kubernetes.io/instance=${release_name}" "${release_name}" "${debug_log_dir}" - - # Cleanup failed deployment - log_warning "Cleaning up failed adapter deployment: ${release_name}" - if helm uninstall "${release_name}" -n "${NAMESPACE}" --wait --timeout 5m; then - log_info "Failed adapter deployment cleaned up successfully" - else - log_warning "Failed to cleanup adapter deployment, it may need manual cleanup" - fi - return 1 - fi - else - log_error "Failed to install adapter ${adapter_name} for ${resource_type}" - - # Check if release was created (partial deployment) and cleanup - if helm list -n "${NAMESPACE}" 2>/dev/null | grep -q "^${release_name}"; then - # Capture debug logs before cleanup - local debug_log_dir="${DEBUG_LOG_DIR:-${WORK_DIR}/debug-logs}" - capture_debug_logs "${NAMESPACE}" "app.kubernetes.io/instance=${release_name}" "${release_name}" "${debug_log_dir}" - - log_warning "Cleaning up failed adapter deployment: ${release_name}" - if helm uninstall "${release_name}" -n "${NAMESPACE}" --wait --timeout 5m; then - log_info "Failed adapter deployment cleaned up successfully" - else - log_warning "Failed to cleanup adapter deployment, it may need manual cleanup" - fi - fi - return 1 - fi -} - -install_adapters() { - log_section "Deploying All Adapters" - - # Discover adapters - local adapters - if ! adapters=$(discover_adapters); then - log_warning "No adapters found to deploy" - return 0 - fi - - # Install each adapter - local failed=0 - while IFS= read -r adapter_dir; do - if ! install_adapter_instance "${adapter_dir}"; then - log_warning "Failed to install adapter: ${adapter_dir}" - ((failed++)) - fi - done <<<"${adapters}" - - if [[ ${failed} -gt 0 ]]; then - log_error "${failed} adapter(s) failed to install" - return 1 - else - log_success "All adapters deployed successfully" - fi -} - -# ============================================================================ -# Adapter Uninstallation Functions -# ============================================================================ - -uninstall_adapter_instance() { - local dir_name="$1" - - log_section "Uninstalling Adapter: ${dir_name}" - - # Extract resource_type and adapter_name from format: resource_type|adapter_name - local resource_type="${dir_name%%|*}" - local adapter_name="${dir_name##*|}" - - # Validate the descriptor format and ensure both parts are non-empty - if [[ -z "${resource_type}" || -z "${adapter_name}" || "${dir_name}" != *"|"* ]]; then - log_error "Invalid adapter descriptor '${dir_name}'. Expected format: resource_type|adapter_name" - return 1 - fi - - log_info "Resource type: ${resource_type}" - log_info "Adapter name: ${adapter_name}" - - # Find all releases by searching for Helm labels (avoids pattern matching issues with truncated names) - log_info "Searching for releases with labels: adapter-resource-type=${resource_type}, adapter-name=${adapter_name}" - local matching_releases - matching_releases=$(helm list -n "${NAMESPACE}" --selector "adapter-resource-type=${resource_type},adapter-name=${adapter_name}" -q 2>/dev/null) - - if [[ -z "${matching_releases}" ]]; then - # Fallback: search by name prefix for releases created before labels were added - log_info "No releases found with labels. Trying fallback search by name prefix..." - local name_prefix="adapter-${resource_type}-${adapter_name}" - matching_releases=$(helm list -n "${NAMESPACE}" -q 2>/dev/null | grep "^${name_prefix}" || true) - - if [[ -z "${matching_releases}" ]]; then - log_warning "No releases found for adapter-resource-type=${resource_type}, adapter-name=${adapter_name} in namespace '${NAMESPACE}'" - return 0 - else - log_info "Found releases using name prefix fallback: ${matching_releases}" - fi - fi - - # Uninstall all matching releases - local uninstall_errors=0 - while IFS= read -r release_name; do - if [[ "${DRY_RUN}" == "true" ]]; then - log_info "[DRY-RUN] Would uninstall adapter (release: ${release_name})" - else - log_info "Uninstalling adapter ${adapter_name} for ${resource_type} (release: ${release_name})..." - log_info "Executing: helm uninstall ${release_name} -n ${NAMESPACE} --wait --timeout 5m" - - if helm uninstall "${release_name}" -n "${NAMESPACE}" --wait --timeout 5m; then - log_success "Adapter ${adapter_name} for ${resource_type} (release: ${release_name}) uninstalled successfully" - else - log_error "Failed to uninstall adapter ${adapter_name} for ${resource_type} (release: ${release_name})" - ((uninstall_errors++)) - fi - fi - done <<<"${matching_releases}" - - if [[ ${uninstall_errors} -gt 0 ]]; then - return 1 - fi - return 0 -} - -uninstall_adapters() { - log_section "Uninstalling All Adapters" - - # Discover adapters - local adapters - if ! adapters=$(discover_adapters); then - log_warning "No adapters found to uninstall" - return 0 - fi - - # Uninstall each adapter - local failed=0 - while IFS= read -r adapter_dir; do - if ! uninstall_adapter_instance "${adapter_dir}"; then - log_warning "Failed to uninstall adapter: ${adapter_dir}" - ((failed++)) - fi - done <<<"${adapters}" - - if [[ ${failed} -gt 0 ]]; then - log_error "${failed} adapter(s) failed to uninstall" - return 1 - else - log_success "All adapters uninstalled successfully" - fi -} diff --git a/deploy-scripts/lib/api.sh b/deploy-scripts/lib/api.sh deleted file mode 100755 index e57ea22..0000000 --- a/deploy-scripts/lib/api.sh +++ /dev/null @@ -1,146 +0,0 @@ -#!/usr/bin/env bash - -# api.sh - HyperFleet API component deployment functions -# -# This module handles installation and uninstallation of the HyperFleet API component - -# ============================================================================ -# API Component Functions -# ============================================================================ - -install_api() { - log_section "Installing API" - - local release_name="hyperfleet-api" - local full_chart_path="${WORK_DIR}/api/${API_CHART_PATH}" - - # Use API_ADAPTERS_* environment variables for API configuration - # These should be set dynamically based on specific test case requirements - local cluster_adapters="${API_ADAPTERS_CLUSTER:-}" - local nodepool_adapters="${API_ADAPTERS_NODEPOOL:-}" - - log_info "API Adapter Configuration:" - log_info " Cluster adapters: ${cluster_adapters:-}" - log_info " NodePool adapters: ${nodepool_adapters:-}" - - if [[ "${DRY_RUN}" == "true" ]]; then - log_info "[DRY-RUN] Would install API with:" - log_info " Release name: ${release_name}" - log_info " Namespace: ${NAMESPACE}" - log_info " Chart path: ${full_chart_path}" - log_info " Image: ${IMAGE_REGISTRY}/${API_IMAGE_REPO}:${API_IMAGE_TAG}" - log_info " Service type: ${API_SERVICE_TYPE}" - [[ -n "${cluster_adapters}" ]] && log_info " Cluster adapters: ${cluster_adapters}" - [[ -n "${nodepool_adapters}" ]] && log_info " Nodepool adapters: ${nodepool_adapters}" - return 0 - fi - - log_info "Installing API..." - log_verbose "Release name: ${release_name}" - log_verbose "Image: ${IMAGE_REGISTRY}/${API_IMAGE_REPO}:${API_IMAGE_TAG}" - - # Build helm command with image overrides - local helm_cmd=( - helm upgrade --install - "${release_name}" - "${full_chart_path}" - --namespace "${NAMESPACE}" - --create-namespace - --wait - --timeout 3m - --set "image.registry=${IMAGE_REGISTRY}" - --set "image.repository=${API_IMAGE_REPO}" - --set "image.tag=${API_IMAGE_TAG}" - --set "image.pullPolicy=${IMAGE_PULL_POLICY}" - --set "service.type=${API_SERVICE_TYPE}" - ) - - # Add adapter configurations (always set both, use empty if not discovered) - # The API chart requires both config.adapters.required.cluster and config.adapters.required.nodepool to be set - if [[ -n "${cluster_adapters}" ]]; then - helm_cmd+=(--set "config.adapters.required.cluster={${cluster_adapters}}") - log_verbose "Cluster adapters (API): ${cluster_adapters}" - else - helm_cmd+=(--set "config.adapters.required.cluster={}") - log_verbose "Cluster adapters (API): none" - fi - - if [[ -n "${nodepool_adapters}" ]]; then - helm_cmd+=(--set "config.adapters.required.nodepool={${nodepool_adapters}}") - log_verbose "Nodepool adapters (API): ${nodepool_adapters}" - else - helm_cmd+=(--set "config.adapters.required.nodepool={}") - log_verbose "Nodepool adapters (API): none" - fi - - log_info "Executing: ${helm_cmd[*]}" - - if "${helm_cmd[@]}"; then - log_success "API Helm release created successfully" - - # Verify pod health - log_info "Verifying pod health..." - if verify_pod_health "${NAMESPACE}" "app.kubernetes.io/instance=${release_name}" "API" 120 5; then - log_success "API is running and healthy" - else - log_error "API deployment failed health check" - - # Capture debug logs before cleanup - local debug_log_dir="${DEBUG_LOG_DIR:-${WORK_DIR}/debug-logs}" - capture_debug_logs "${NAMESPACE}" "app.kubernetes.io/instance=${release_name}" "${release_name}" "${debug_log_dir}" - - # Cleanup failed deployment - log_warning "Cleaning up failed API deployment: ${release_name}" - if helm uninstall "${release_name}" -n "${NAMESPACE}" --wait --timeout 5m; then - log_info "Failed API deployment cleaned up successfully" - else - log_warning "Failed to cleanup API deployment, it may need manual cleanup" - fi - return 1 - fi - else - log_error "Failed to install API" - - # Check if release was created (partial deployment) and cleanup - if helm list -n "${NAMESPACE}" 2>/dev/null | grep -q "^${release_name}"; then - # Capture debug logs before cleanup - local debug_log_dir="${DEBUG_LOG_DIR:-${WORK_DIR}/debug-logs}" - capture_debug_logs "${NAMESPACE}" "app.kubernetes.io/instance=${release_name}" "${release_name}" "${debug_log_dir}" - - log_warning "Cleaning up failed API deployment: ${release_name}" - if helm uninstall "${release_name}" -n "${NAMESPACE}" --wait --timeout 5m; then - log_info "Failed API deployment cleaned up successfully" - else - log_warning "Failed to cleanup API deployment, it may need manual cleanup" - fi - fi - return 1 - fi -} - -uninstall_api() { - log_section "Uninstalling API" - - local release_name="hyperfleet-api" - - # Check if release exists - if [[ -z "$(helm list -n "${NAMESPACE}" -q -f "^${release_name}$")" ]]; then - log_warning "Release '${release_name}' not found in namespace '${NAMESPACE}'" - return 0 - fi - - if [[ "${DRY_RUN}" == "true" ]]; then - log_info "[DRY-RUN] Would uninstall API (release: ${release_name})" - return 0 - fi - - log_info "Uninstalling API..." - log_info "Executing: helm uninstall ${release_name} -n ${NAMESPACE} --wait --timeout 5m" - - if helm uninstall "${release_name}" -n "${NAMESPACE}" --wait --timeout 5m; then - log_success "API uninstalled successfully" - else - log_error "Failed to uninstall API" - return 1 - fi -} diff --git a/deploy-scripts/lib/common.sh b/deploy-scripts/lib/common.sh deleted file mode 100755 index 7890618..0000000 --- a/deploy-scripts/lib/common.sh +++ /dev/null @@ -1,305 +0,0 @@ -#!/usr/bin/env bash - -# common.sh - Common utilities for CLM deployment scripts -# -# This module provides shared functionality used across all deployment scripts: -# - Logging functions -# - Dependency checking -# - Kubernetes context validation - -# ============================================================================ -# Logging Functions -# ============================================================================ - -log_info() { - echo "[INFO] $*" -} - -log_success() { - echo "[SUCCESS] $*" -} - -log_warning() { - echo "[WARNING] $*" -} - -log_error() { - echo "[ERROR] $*" >&2 -} - -log_verbose() { - if [[ "${VERBOSE}" == "true" ]]; then - echo "[VERBOSE] $*" - fi -} - -log_section() { - echo - echo "===================================================================" - echo "$*" - echo "===================================================================" -} - -# ============================================================================ -# Dependency Checking -# ============================================================================ - -check_dependencies() { - log_section "Checking Dependencies" - - local missing_deps=() - - local deps=("kubectl" "helm" "git") - for dep in "${deps[@]}"; do - if ! command -v "${dep}" &> /dev/null; then - missing_deps+=("${dep}") - log_error "Required dependency '${dep}' not found" - else - local version - case "${dep}" in - kubectl) - version=$(kubectl version --client --short 2>/dev/null | head -n1 || echo "unknown") - ;; - helm) - version=$(helm version --short 2>/dev/null || echo "unknown") - ;; - git) - version=$(git --version || echo "unknown") - ;; - esac - log_verbose "Found ${dep}: ${version}" - fi - done - - if [[ ${#missing_deps[@]} -gt 0 ]]; then - log_error "Missing required dependencies: ${missing_deps[*]}" - log_error "Please install the missing dependencies and try again" - return 1 - fi - - log_success "All dependencies are available" - return 0 -} - -# ============================================================================ -# Kubernetes Context Validation -# ============================================================================ - -validate_kubectl_context() { - log_section "Validating Kubernetes Context" - - if ! kubectl cluster-info &> /dev/null; then - log_error "Unable to connect to Kubernetes cluster" - log_error "Please ensure your kubeconfig is properly configured" - return 1 - fi - - local context - context=$(kubectl config current-context) - log_info "Current kubectl context: ${context}" - - local cluster_info - cluster_info=$(kubectl cluster-info 2>&1 | head -n1 || echo "unknown") - log_verbose "Cluster info: ${cluster_info}" - - log_success "Kubectl context validated" - return 0 -} - -# ============================================================================ -# Pod Health Verification -# ============================================================================ - -verify_pod_health() { - local namespace="$1" - local selector="$2" - local component_name="${3:-component}" - local timeout="${4:-60}" - local interval="${5:-5}" - - log_info "Verifying pod health for ${component_name}..." - log_verbose "Namespace: ${namespace}, Selector: ${selector}" - - local elapsed=0 - while [[ ${elapsed} -lt ${timeout} ]]; do - # Get pod status - local pod_status - pod_status=$(kubectl get pods -n "${namespace}" -l "${selector}" \ - -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.status.phase}{"\t"}{range .status.containerStatuses[*]}{.state.waiting.reason}{" "}{.state.terminated.reason}{end}{"\n"}{end}' 2>/dev/null) - - if [[ -z "${pod_status}" ]]; then - log_warning "No pods found with selector ${selector} in namespace ${namespace}" - sleep ${interval} - ((elapsed += interval)) - continue - fi - - # Check for failure states - local has_failures=false - local failure_details="" - - while IFS=$'\t' read -r pod_name phase reasons; do - log_verbose "Pod ${pod_name}: phase=${phase}, reasons=${reasons}" - - # Check for problematic states - if [[ "${phase}" == "Failed" ]] || \ - [[ "${reasons}" == *"CrashLoopBackOff"* ]] || \ - [[ "${reasons}" == *"Error"* ]] || \ - [[ "${reasons}" == *"ImagePullBackOff"* ]] || \ - [[ "${reasons}" == *"ErrImagePull"* ]]; then - has_failures=true - failure_details="${failure_details}\n - ${pod_name}: ${phase} (${reasons})" - fi - done <<< "${pod_status}" - - if [[ "${has_failures}" == "true" ]]; then - log_error "Pod health check failed for ${component_name}:" - echo -e "${failure_details}" - log_info "Pod details:" - kubectl get pods -n "${namespace}" -l "${selector}" - return 1 - fi - - # Check if all pods are running - local running_count - running_count=$(kubectl get pods -n "${namespace}" -l "${selector}" \ - -o jsonpath='{range .items[*]}{.status.phase}{"\n"}{end}' 2>/dev/null | grep -c "^Running$" || echo "0") - - local total_count - total_count=$(kubectl get pods -n "${namespace}" -l "${selector}" --no-headers 2>/dev/null | wc -l | tr -d ' ') - - if [[ ${running_count} -gt 0 ]] && [[ ${running_count} -eq ${total_count} ]]; then - log_success "All pods for ${component_name} are running (${running_count}/${total_count})" - return 0 - fi - - log_verbose "Waiting for pods to be ready: ${running_count}/${total_count} running (${elapsed}s/${timeout}s)" - sleep ${interval} - ((elapsed += interval)) - done - - log_error "Timeout waiting for ${component_name} pods to become healthy" - log_info "Current pod status:" - kubectl get pods -n "${namespace}" -l "${selector}" - return 1 -} - -# ============================================================================ -# Debug Log Capture -# ============================================================================ - -capture_debug_logs() { - local namespace="$1" - local selector="$2" - local component_name="$3" - local output_dir="${4:-${WORK_DIR:-${PWD}}/debug-logs}" - local capture_failed=false - - log_section "Capturing Debug Logs for ${component_name}" - - # Create output directory - if ! mkdir -p "${output_dir}"; then - log_error "Failed to create debug log directory: ${output_dir}" - return 1 - fi - - local timestamp - timestamp=$(date +"%Y%m%d-%H%M%S") - local log_prefix="${output_dir}/${component_name}-${timestamp}-$$-${RANDOM}" - - log_info "Saving debug logs to: ${log_prefix}-*" - - # Capture pod logs - log_info "Capturing pod logs..." - kubectl logs -n "${namespace}" -l "${selector}" --all-containers=true --prefix=true > "${log_prefix}-pods.log" 2>&1 || { log_warning "Failed to capture current pod logs"; capture_failed=true; } - - # Capture previous pod logs (for crashed containers) - log_info "Capturing previous pod logs..." - kubectl logs -n "${namespace}" -l "${selector}" --all-containers=true --prefix=true --previous > "${log_prefix}-pods-previous.log" 2>&1 || true - - # Capture pod descriptions - log_info "Capturing pod descriptions..." - kubectl describe pods -n "${namespace}" -l "${selector}" > "${log_prefix}-pods-describe.txt" 2>&1 || { log_warning "Failed to capture pod descriptions"; capture_failed=true; } - - # Capture pod status - log_info "Capturing pod status..." - kubectl get pods -n "${namespace}" -l "${selector}" -o wide > "${log_prefix}-pods-status.txt" 2>&1 || { log_warning "Failed to capture pod status"; capture_failed=true; } - kubectl get pods -n "${namespace}" -l "${selector}" -o yaml > "${log_prefix}-pods-yaml.yaml" 2>&1 || { log_warning "Failed to capture pod YAML"; capture_failed=true; } - - # Capture events - log_info "Capturing namespace events..." - kubectl get events -n "${namespace}" --sort-by='.lastTimestamp' > "${log_prefix}-events.txt" 2>&1 || { log_warning "Failed to capture namespace events"; capture_failed=true; } - - # Capture deployment/statefulset status if exists - log_info "Capturing deployment/statefulset status..." - kubectl get deployments,statefulsets -n "${namespace}" -l "${selector}" -o wide > "${log_prefix}-workloads-status.txt" 2>&1 || { log_warning "Failed to capture workload status"; capture_failed=true; } - kubectl get deployments,statefulsets -n "${namespace}" -l "${selector}" -o yaml > "${log_prefix}-workloads-yaml.yaml" 2>&1 || { log_warning "Failed to capture workload YAML"; capture_failed=true; } - - # Capture services and endpoints - log_info "Capturing services and endpoints..." - kubectl get svc,endpoints -n "${namespace}" -l "${selector}" -o wide > "${log_prefix}-network.txt" 2>&1 || { log_warning "Failed to capture services and endpoints"; capture_failed=true; } - - # Create a summary file - cat > "${log_prefix}-summary.txt" < /dev/null; then - log_warning "Namespace '${namespace}' does not exist" - return 0 - fi - - if [[ "${DRY_RUN}" == "true" ]]; then - log_info "[DRY-RUN] Would delete namespace: ${namespace}" - return 0 - fi - - log_info "Deleting namespace: ${namespace}" - log_warning "This will remove all resources in the namespace" - - if kubectl delete namespace "${namespace}" --wait --timeout=5m; then - log_success "Namespace '${namespace}' deleted successfully" - return 0 - else - log_error "Failed to delete namespace '${namespace}'" - log_info "You may need to manually remove finalizers or check for stuck resources" - return 1 - fi -} diff --git a/deploy-scripts/lib/gcp.sh b/deploy-scripts/lib/gcp.sh deleted file mode 100644 index 41b2be9..0000000 --- a/deploy-scripts/lib/gcp.sh +++ /dev/null @@ -1,320 +0,0 @@ -#!/usr/bin/env bash - -# gcp.sh - Google Cloud Platform resource management functions -# -# This module handles discovery and cleanup of GCP resources (Pub/Sub topics and subscriptions) -# created during deployment. -# -# NAMESPACE requirements -# - Must be unique to prevent Pub/Sub topic/subscription collisions across deployments -# - Must be DNS-1123 compliant (lowercase alphanumeric, hyphens, start/end with alphanumeric) -# - Default: hyperfleet-e2e-$USER (when using .env configuration) - -# ============================================================================ -# Constants -# ============================================================================ - -# Resource types managed by the system -readonly RESOURCE_TYPES=("clusters" "nodepools") - -# ============================================================================ -# GCP Dependency Checking -# ============================================================================ - -check_gcp_dependencies() { - log_verbose "Checking GCP CLI dependencies" - - if ! command -v gcloud &> /dev/null; then - log_error "gcloud CLI not found" - log_error "Please install Google Cloud SDK: https://cloud.google.com/sdk/docs/install" - return 1 - fi - - local gcloud_version - gcloud_version=$(gcloud --version 2>/dev/null | head -n1 || echo "unknown") - log_verbose "Found gcloud: ${gcloud_version}" - - return 0 -} - -# ============================================================================ -# GCP Pub/Sub Discovery Functions -# ============================================================================ - -discover_pubsub_topics() { - local namespace="$1" - local project_id="${GCP_PROJECT_ID}" - - log_verbose "Discovering Pub/Sub topics for namespace: ${namespace}" - - if [[ -z "${project_id}" ]]; then - log_error "GCP_PROJECT_ID is not set" - return 1 - fi - - # List topics that match the namespace pattern - # NAMESPACE must be unique and DNS-1123 compliant (default: hyperfleet-e2e-$USER when using .env) - # Topics are named: - # - ${NAMESPACE}-${resource_type} (e.g., hyperfleet-e2e-jdoe-clusters, hyperfleet-e2e-jdoe-nodepools) - # - ${NAMESPACE}-${resource_type}-dlq (e.g., hyperfleet-e2e-jdoe-clusters-dlq) - # - ${NAMESPACE}-${resource_type}-${adapter_name}-dlq (e.g., hyperfleet-e2e-jdoe-clusters-adapter1-dlq) - local topics=() - local all_topics - - if ! all_topics=$(gcloud pubsub topics list --project="${project_id}" --format="value(name)" 2>/dev/null); then - log_error "Failed to list Pub/Sub topics in project ${project_id}" - log_error "Make sure you have authenticated with: gcloud auth login" - return 1 - fi - - while IFS= read -r topic; do - if [[ -z "${topic}" ]]; then - continue - fi - - # Extract topic name from full path (projects/{project}/topics/{topic-name}) - local topic_name="${topic##*/}" - - # Match topics with all naming patterns: - # 1. Main topics: ${namespace}-${resource_type} - # 2. DLQ topics (intended): ${namespace}-${resource_type}-dlq - # 3. DLQ topics (temporary/Helm bug): ${namespace}-${resource_type}-${adapter_name}-dlq - local matched=false - for resource_type in "${RESOURCE_TYPES[@]}"; do - if [[ "${topic_name}" == "${namespace}-${resource_type}" ]] || \ - [[ "${topic_name}" == "${namespace}-${resource_type}-dlq" ]] || \ - [[ "${topic_name}" =~ ^${namespace}-${resource_type}-.+-dlq$ ]]; then - matched=true - break - fi - done - - if [[ "${matched}" == "true" ]]; then - topics+=("${topic_name}") - fi - done <<< "${all_topics}" - - if [[ ${#topics[@]} -eq 0 ]]; then - log_verbose "No Pub/Sub topics found for namespace: ${namespace}" >&2 - return 1 - fi - - log_info "Found ${#topics[@]} Pub/Sub topic(s) for namespace ${namespace}:" >&2 - for topic in "${topics[@]}"; do - log_info " - ${topic}" >&2 - done - - # Export for use in other functions (stdout only) - printf '%s\n' "${topics[@]}" -} - -discover_pubsub_subscriptions() { - local namespace="$1" - local project_id="${GCP_PROJECT_ID}" - - log_verbose "Discovering Pub/Sub subscriptions for namespace: ${namespace}" - - if [[ -z "${project_id}" ]]; then - log_error "GCP_PROJECT_ID is not set" - return 1 - fi - - # List subscriptions that match the namespace pattern - # NAMESPACE must be unique and DNS-1123 compliant (default: hyperfleet-e2e-$USER when using .env) - # Subscriptions are named: ${NAMESPACE}-${resource_type}-${adapter_name} - # Example: hyperfleet-e2e-jdoe-clusters-adapter1, -clusters-adapter1 - local subscriptions=() - local all_subscriptions - - if ! all_subscriptions=$(gcloud pubsub subscriptions list --project="${project_id}" --format="value(name)" 2>/dev/null); then - log_error "Failed to list Pub/Sub subscriptions in project ${project_id}" - log_error "Make sure you have authenticated with: gcloud auth login" - return 1 - fi - - while IFS= read -r subscription; do - if [[ -z "${subscription}" ]]; then - continue - fi - - # Extract subscription name from full path (projects/{project}/subscriptions/{subscription-name}) - local subscription_name="${subscription##*/}" - - # Match subscriptions with the expected naming pattern: - # ${namespace}-${resource_type}-${adapter_name} - local matched=false - for resource_type in "${RESOURCE_TYPES[@]}"; do - if [[ "${subscription_name}" =~ ^${namespace}-${resource_type}-.+ ]]; then - matched=true - break - fi - done - - if [[ "${matched}" == "true" ]]; then - subscriptions+=("${subscription_name}") - fi - done <<< "${all_subscriptions}" - - if [[ ${#subscriptions[@]} -eq 0 ]]; then - log_verbose "No Pub/Sub subscriptions found for namespace: ${namespace}" >&2 - return 1 - fi - - log_info "Found ${#subscriptions[@]} Pub/Sub subscription(s) for namespace ${namespace}:" >&2 - for subscription in "${subscriptions[@]}"; do - log_info " - ${subscription}" >&2 - done - - # Export for use in other functions (stdout only) - printf '%s\n' "${subscriptions[@]}" -} - -# ============================================================================ -# GCP Pub/Sub Deletion Functions -# ============================================================================ - -delete_pubsub_subscription() { - local subscription_name="$1" - local project_id="${GCP_PROJECT_ID}" - - if [[ "${DRY_RUN}" == "true" ]]; then - log_info "[DRY-RUN] Would delete subscription: ${subscription_name}" - return 0 - fi - - log_info "Deleting subscription: ${subscription_name}" - - if gcloud pubsub subscriptions delete "${subscription_name}" \ - --project="${project_id}" \ - --quiet 2>/dev/null; then - log_success "Deleted subscription: ${subscription_name}" - return 0 - else - log_error "Failed to delete subscription: ${subscription_name}" - return 1 - fi -} - -delete_pubsub_topic() { - local topic_name="$1" - local project_id="${GCP_PROJECT_ID}" - - if [[ "${DRY_RUN}" == "true" ]]; then - log_info "[DRY-RUN] Would delete topic: ${topic_name}" - return 0 - fi - - log_info "Deleting topic: ${topic_name}" - - if gcloud pubsub topics delete "${topic_name}" \ - --project="${project_id}" \ - --quiet 2>/dev/null; then - log_success "Deleted topic: ${topic_name}" - return 0 - else - log_error "Failed to delete topic: ${topic_name}" - return 1 - fi -} - -delete_all_pubsub_subscriptions() { - local namespace="$1" - - log_section "Deleting Pub/Sub Subscriptions" - - # Discover subscriptions (stdout only contains resource names, stderr has logs) - local subscriptions - if ! subscriptions=$(discover_pubsub_subscriptions "${namespace}"); then - log_info "No Pub/Sub subscriptions to delete" - return 0 - fi - - # Delete each subscription - local failed=0 - while IFS= read -r subscription; do - if [[ -n "${subscription}" ]]; then - if ! delete_pubsub_subscription "${subscription}"; then - log_error "Failed to delete subscription: ${subscription}" - ((failed++)) - fi - fi - done <<< "${subscriptions}" - - if [[ ${failed} -gt 0 ]]; then - log_error "${failed} subscription(s) failed to delete" - return 1 - else - log_success "All subscriptions deleted successfully" - return 0 - fi -} - -delete_all_pubsub_topics() { - local namespace="$1" - - log_section "Deleting Pub/Sub Topics" - - # Discover topics (stdout only contains resource names, stderr has logs) - local topics - if ! topics=$(discover_pubsub_topics "${namespace}"); then - log_info "No Pub/Sub topics to delete" - return 0 - fi - - # Delete each topic - local failed=0 - while IFS= read -r topic; do - if [[ -n "${topic}" ]]; then - if ! delete_pubsub_topic "${topic}"; then - log_error "Failed to delete topic: ${topic}" - ((failed++)) - fi - fi - done <<< "${topics}" - - if [[ ${failed} -gt 0 ]]; then - log_error "${failed} topic(s) failed to delete" - return 1 - else - log_success "All topics deleted successfully" - return 0 - fi -} - -# ============================================================================ -# Main GCP Cleanup Function -# ============================================================================ - -cleanup_gcp_resources() { - local namespace="$1" - - log_section "Cleaning Up GCP Resources" - - # Check GCP CLI dependencies - if ! check_gcp_dependencies; then - log_error "GCP CLI dependencies not available" - return 1 - fi - - local cleanup_errors=0 - - # Delete subscriptions first (subscriptions depend on topics) - if ! delete_all_pubsub_subscriptions "${namespace}"; then - log_warning "Some subscriptions failed to delete" - ((cleanup_errors++)) - fi - - # Delete topics - if ! delete_all_pubsub_topics "${namespace}"; then - log_warning "Some topics failed to delete" - ((cleanup_errors++)) - fi - - if [[ ${cleanup_errors} -gt 0 ]]; then - log_warning "GCP resource cleanup completed with errors" - return 1 - else - log_success "GCP resource cleanup complete" - return 0 - fi -} diff --git a/deploy-scripts/lib/helm.sh b/deploy-scripts/lib/helm.sh deleted file mode 100755 index a835f49..0000000 --- a/deploy-scripts/lib/helm.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env bash - -# helm.sh - Helm chart management functions -# -# This module provides functions for cloning and managing Helm charts - -# ============================================================================ -# Helm Chart Management -# ============================================================================ - -clone_helm_chart() { - local component="$1" - local repo_url="$2" - local ref="$3" - local chart_path="$4" - - log_info "Cloning ${component} Helm chart from ${repo_url}@${ref} (sparse: ${chart_path})" - - local component_dir="${WORK_DIR}/${component}" - - if [[ -z "${WORK_DIR}" || "${WORK_DIR}" == "/" ]]; then - log_error "WORK_DIR must be set to a non-root directory" - return 1 - fi - if [[ -z "${component}" ]]; then - log_error "Component name is required" - return 1 - fi - - # Clean up any existing directory to ensure fresh clone - if [[ -d "${component_dir}" ]]; then - log_verbose "Removing existing directory: ${component_dir}" - rm -rf "${component_dir}" - fi - - if [[ "${DRY_RUN}" == "true" ]]; then - log_info "[DRY-RUN] Would clone (sparse): git clone --depth 1 --filter=blob:none --sparse --branch ${ref} ${repo_url}" - log_info "[DRY-RUN] Would checkout: ${chart_path}" - return 0 - fi - - # Clone with sparse checkout - only download the chart directory - log_verbose "Executing sparse checkout: git clone --depth 1 --filter=blob:none --sparse --no-checkout --branch ${ref} ${repo_url} ${component_dir}" - if ! git clone --depth 1 --filter=blob:none --sparse --no-checkout --branch "${ref}" "${repo_url}" "${component_dir}" >/dev/null 2>&1; then - log_error "Failed to clone ${component} Helm chart" - return 1 - fi - - # Configure sparse checkout to only include the chart path (no cone mode to avoid root files) - log_verbose "Configuring sparse checkout for: ${chart_path}" - if ! (cd "${component_dir}" && \ - git sparse-checkout init --no-cone >/dev/null 2>&1 && \ - git sparse-checkout set "${chart_path}" >/dev/null 2>&1 && \ - git checkout "${ref}" >/dev/null 2>&1); then - log_error "Failed to checkout chart path: ${chart_path}" - return 1 - fi - - # Verify chart path exists - local full_chart_path="${component_dir}/${chart_path}" - if [[ ! -f "${full_chart_path}/Chart.yaml" ]]; then - log_error "Chart.yaml not found at ${full_chart_path}" - log_error "Please verify the chart path is correct" - return 1 - fi - - log_success "Cloned ${component} Helm chart" - log_verbose "Chart location: ${full_chart_path}" -} diff --git a/deploy-scripts/lib/sentinel.sh b/deploy-scripts/lib/sentinel.sh deleted file mode 100755 index 2033f33..0000000 --- a/deploy-scripts/lib/sentinel.sh +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/bin/env bash - -# sentinel.sh - HyperFleet Sentinel component deployment functions -# -# This module handles installation and uninstallation of HyperFleet Sentinel instances -# for both clusters and nodepools resource types - -# ============================================================================ -# Sentinel Component Functions -# ============================================================================ - -install_sentinel_instance() { - local resource_type="$1" # "clusters" or "nodepools" - - local component_name="Sentinel (${resource_type})" - local release_name="sentinel-${resource_type}" - local full_chart_path="${WORK_DIR}/sentinel/${SENTINEL_CHART_PATH}" - - log_section "Installing ${component_name}" - - # Determine API base URL - local api_url="${API_BASE_URL}" - - if [[ "${SENTINEL_BROKER_TYPE}" == "rabbitmq" && -z "${SENTINEL_BROKER_RABBITMQ_URL}" ]]; then - log_error "SENTINEL_BROKER_RABBITMQ_URL must be set when SENTINEL_BROKER_TYPE=rabbitmq" - return 1 - fi - - if [[ "${DRY_RUN}" == "true" ]]; then - log_info "[DRY-RUN] Would install ${component_name} with:" - log_info " Release name: ${release_name}" - log_info " Namespace: ${NAMESPACE}" - log_info " Chart path: ${full_chart_path}" - log_info " Image: ${IMAGE_REGISTRY}/${SENTINEL_IMAGE_REPO}:${SENTINEL_IMAGE_TAG}" - log_info " API base URL: ${api_url} (config.clients.hyperfleetApi.baseUrl)" - log_info " Broker type: ${SENTINEL_BROKER_TYPE}" - log_info " Resource type: ${resource_type}" - log_info " Google Pub/Sub Project ID: ${GCP_PROJECT_ID}" - log_info " Google Pub/Sub Create Topic If Missing: ${SENTINEL_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING}" - return 0 - fi - - log_info "Installing ${component_name}..." - log_verbose "Release name: ${release_name}" - log_verbose "Image: ${IMAGE_REGISTRY}/${SENTINEL_IMAGE_REPO}:${SENTINEL_IMAGE_TAG}" - log_verbose "API base URL: ${api_url}" - log_verbose "Resource type: ${resource_type}" - - # Build helm command - local helm_cmd=( - helm upgrade --install - "${release_name}" - "${full_chart_path}" - --namespace "${NAMESPACE}" - --create-namespace - --wait - --timeout 3m - --set "image.registry=${IMAGE_REGISTRY}" - --set "image.repository=${SENTINEL_IMAGE_REPO}" - --set "image.tag=${SENTINEL_IMAGE_TAG}" - --set "image.pullPolicy=${IMAGE_PULL_POLICY}" - --set "config.clients.hyperfleetApi.baseUrl=${api_url}" - --set "config.resourceType=${resource_type}" - --set "broker.type=${SENTINEL_BROKER_TYPE}" - --set "broker.googlepubsub.projectId=${GCP_PROJECT_ID}" - --set "broker.googlepubsub.createTopicIfMissing=${SENTINEL_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING}" - ) - - if [[ "${SENTINEL_BROKER_TYPE}" == "rabbitmq" && -n "${SENTINEL_BROKER_RABBITMQ_URL}" ]]; then - helm_cmd+=(--set "broker.rabbitmq.url=${SENTINEL_BROKER_RABBITMQ_URL}") - fi - - # Add message_data.owner_references configuration for nodepools resource type - # This enables the sentinel to include ownerReferences from the Kubernetes resource - # in the message data sent to adapters, which is required for nodepools management - if [[ "${resource_type}" == "nodepools" ]]; then - helm_cmd+=( - --set "config.messageData.owner_references.id=resource.owner_references.id" - --set "config.messageData.owner_references.href=resource.owner_references.href" - --set "config.messageData.owner_references.kind=resource.owner_references.kind" - ) - fi - - log_info "Executing: ${helm_cmd[*]}" - - if "${helm_cmd[@]}"; then - log_success "${component_name} Helm release created successfully" - - # Verify pod health - log_info "Verifying pod health..." - if verify_pod_health "${NAMESPACE}" "app.kubernetes.io/instance=${release_name}" "${component_name}" 120 5; then - log_success "${component_name} is running and healthy" - else - log_error "${component_name} deployment failed health check" - - # Capture debug logs before cleanup - local debug_log_dir="${DEBUG_LOG_DIR:-${WORK_DIR}/debug-logs}" - capture_debug_logs "${NAMESPACE}" "app.kubernetes.io/instance=${release_name}" "${release_name}" "${debug_log_dir}" - - # Cleanup failed deployment - log_warning "Cleaning up failed ${component_name} deployment: ${release_name}" - if helm uninstall "${release_name}" -n "${NAMESPACE}" --wait --timeout 5m; then - log_info "Failed ${component_name} deployment cleaned up successfully" - else - log_warning "Failed to cleanup ${component_name} deployment, it may need manual cleanup" - fi - return 1 - fi - else - log_error "Failed to install ${component_name}" - - # Check if release was created (partial deployment) and cleanup - if helm list -n "${NAMESPACE}" 2>/dev/null | grep -q "^${release_name}"; then - # Capture debug logs before cleanup - local debug_log_dir="${DEBUG_LOG_DIR:-${WORK_DIR}/debug-logs}" - capture_debug_logs "${NAMESPACE}" "app.kubernetes.io/instance=${release_name}" "${release_name}" "${debug_log_dir}" - - log_warning "Cleaning up failed ${component_name} deployment: ${release_name}" - if helm uninstall "${release_name}" -n "${NAMESPACE}" --wait --timeout 5m; then - log_info "Failed ${component_name} deployment cleaned up successfully" - else - log_warning "Failed to cleanup ${component_name} deployment, it may need manual cleanup" - fi - fi - return 1 - fi -} - -install_sentinel() { - - install_sentinel_instance "clusters" || return 1 - install_sentinel_instance "nodepools" || return 1 -} - -uninstall_sentinel_instance() { - local resource_type="$1" # "clusters" or "nodepools" - - # Capitalize first letter for display - local resource_type_display - if [[ "${resource_type}" == "clusters" ]]; then - resource_type_display="Clusters" - else - resource_type_display="Nodepools" - fi - - local component_name="Sentinel (${resource_type_display})" - local release_name="sentinel-${resource_type}" - - log_section "Uninstalling ${component_name}" - - # Check if release exists - if ! helm list -n "${NAMESPACE}" | grep -q "^${release_name}"; then - log_warning "Release '${release_name}' not found in namespace '${NAMESPACE}'" - return 0 - fi - - if [[ "${DRY_RUN}" == "true" ]]; then - log_info "[DRY-RUN] Would uninstall ${component_name} (release: ${release_name})" - return 0 - fi - - log_info "Uninstalling ${component_name}..." - log_info "Executing: helm uninstall ${release_name} -n ${NAMESPACE} --wait --timeout 5m" - - if helm uninstall "${release_name}" -n "${NAMESPACE}" --wait --timeout 5m; then - log_success "${component_name} uninstalled successfully" - else - log_error "Failed to uninstall ${component_name}" - return 1 - fi -} - -uninstall_sentinel() { - # Uninstall in reverse order - uninstall_sentinel_instance "nodepools" || log_warning "Failed to uninstall Sentinel (Nodepools)" - uninstall_sentinel_instance "clusters" || log_warning "Failed to uninstall Sentinel (Clusters)" -} diff --git a/docs/development.md b/docs/development.md index 79b77c6..e091908 100644 --- a/docs/development.md +++ b/docs/development.md @@ -338,20 +338,6 @@ import ( No need to manually register tests. -### 4. Run Your Test - -```bash -# Run all cluster tests -make build -./bin/hyperfleet-e2e test --focus "\[Suite: cluster\]" - -# Run specific test by description -./bin/hyperfleet-e2e test --focus "Create Cluster via API" - -# Or run by label -./bin/hyperfleet-e2e test --label-filter "critical && lifecycle" -``` - ## Common Patterns ### Create Resource from Payload @@ -389,43 +375,65 @@ for _, adapter := range statuses.Items { } ``` -### Running a development environment with custom dev images and RabbitMQ +## Validating New E2E Tests -> **For a complete local setup guide using kind**, see [Local kind Setup](local-kind-setup.md). +After writing your test, validate it works properly: -While in development, it is common to use custom images for components (api, sentinel, adapters) instead of the CI images. +### 1. Set Up Your Development Environment -It is also convenient to use RabbitMQ to avoid dealing with GCP credentials for Pub/Sub. +You need a running HyperFleet environment before running tests. See the [Setup Guide](setup.md) for complete instructions: -RabbitMQ has to be installed beforehand, you can use the `hyperfleet-infra` repository to execute: +- **Kind (local):** Fast setup for local testing (recommended for development) +- **GCP:** Cloud environment for more realistic testing +The environment setup will configure required environment variables: +- `HYPERFLEET_API_URL` +- `MAESTRO_URL` +- `NAMESPACE` +- source `env/env.local` if required + +### 2. Build the E2E Binary + +```bash +# Build the binary +make build ``` -make install-rabbitmq NAMESPACE=rabbitmq -``` -Then you can deploy the e2e test components with support for RabbitMQ and custom images executing: +### 3. Run Your Test + +```bash +# Run your specific test by description +./bin/hyperfleet-e2e test --focus "Your Test Description" +# Or run by suite +./bin/hyperfleet-e2e test --focus "\[Suite: Your new test suite\]" ``` -SENTINEL_BROKER_RABBITMQ_URL="amqp://guest:guest@rabbitmq.rabbitmq:5672" \ -ADAPTER_BROKER_RABBITMQ_URL="amqp://guest:guest@rabbitmq.rabbitmq:5672" \ -ADAPTER_BROKER_TYPE=rabbitmq \ -SENTINEL_BROKER_TYPE=rabbitmq \ -./deploy-scripts/deploy-clm.sh --action install \ ---namespace \ ---image-registry quay.io/ \ ---api-image-repo hyperfleet-api \ ---api-image-tag \ ---sentinel-image-repo hyperfleet-sentinel \ ---sentinel-image-tag \ ---adapter-image-repo hyperfleet-adapter \ ---adapter-image-tag \ ---api-base-url http://hyperfleet-api:8000 \ ---api-adapters-cluster cl-namespace,cl-maestro,cl-deployment,cl-job \ ---api-adapters-nodepool np-configmap \ ---cluster-tier0-adapters cl-namespace,cl-maestro,cl-deployment,cl-job,cl-invalid-resource,cl-precondition-error \ ---nodepool-tier0-adapters np-configmap + +### 4. Run Pre-Commit Checks + +Before committing, ensure your code passes all checks: + +```bash +# Run all checks (format, lint, unit tests) +make check ``` +### 5. Verify Test Behavior + +Ensure your test: +- ✅ Creates resources successfully +- ✅ Waits for expected conditions +- ✅ Cleans up resources (check manually if needed) +- ✅ Passes consistently (run multiple times) +- ✅ Fails appropriately when conditions aren't met + +### 5. Check Test Output + +Review the test output for: +- Clear step descriptions (via `ginkgo.By()`) +- Appropriate timeout values +- Proper error messages on failure + ## Next Steps - **Architecture**: Understand the framework design in [Architecture](architecture.md) diff --git a/docs/getting-started.md b/docs/getting-started.md index 7a4a354..020609f 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -5,9 +5,11 @@ New to HyperFleet E2E? This guide will help you run your first test in 10 minute ## Prerequisites - **Go 1.25+** - Required for building the framework -- **HyperFleet API access** - API endpoint URL +- **HyperFleet deployment** - Running HyperFleet API and Maestro instance - **10 minutes** - Time to complete this guide +> **Need to set up a HyperFleet environment first?** See the [Setup Guide](setup.md) for complete instructions using Kind (local) or GCP. + ## Installation ### Clone and Build @@ -28,13 +30,15 @@ You should see the command help output. ## Your First Test -**Step 1**: Set API URL +**Step 1**: Set required environment variables ```bash -export HYPERFLEET_API_URL=https://api.hyperfleet.example.com +export HYPERFLEET_API_URL= +export MAESTRO_URL= +export NAMESPACE= ``` -**Step 2**: Run tests +**Step 2**: Run tier0 tests ```bash ./bin/hyperfleet-e2e test --label-filter=tier0 @@ -61,6 +65,12 @@ The framework: # Run critical tests only ./bin/hyperfleet-e2e test --label-filter=tier0 +# Run important features +./bin/hyperfleet-e2e test --label-filter=tier1 + +# Run edge cases (requires sourcing env/env.local first) +source env/env.local && ./bin/hyperfleet-e2e test --label-filter=tier2 + # Run all cluster suite tests ./bin/hyperfleet-e2e test --focus "\[Suite: cluster\]" @@ -105,9 +115,13 @@ make generate # Regenerate OpenAPI client **API connection errors**: ```bash -# Verify API URL +# Verify API URLs are set echo $HYPERFLEET_API_URL -curl -I $HYPERFLEET_API_URL +echo $MAESTRO_URL +echo $NAMESPACE + +# Test connectivity +curl -f -X GET ${HYPERFLEET_API_URL}/api/hyperfleet/v1/clusters/ ``` **Test timeouts**: Increase timeouts via environment variables: @@ -115,11 +129,13 @@ curl -I $HYPERFLEET_API_URL HYPERFLEET_TIMEOUTS_CLUSTER_RECONCILED=45m make e2e ``` +**Namespace mismatch**: Ensure `NAMESPACE` matches your deployment namespace. Some tests deploy adapters dynamically and must target the same namespace where HyperFleet components are running. + **Configuration not taking effect**: Priority order (highest to lowest): 1. CLI flags (`--api-url`) -2. Environment variables (`HYPERFLEET_API_URL`) +2. Environment variables (`HYPERFLEET_API_URL`, `MAESTRO_URL`, `NAMESPACE`) 3. Config file (`configs/config.yaml`) 4. Built-in defaults @@ -132,8 +148,11 @@ Priority order (highest to lowest): ./bin/hyperfleet-e2e test --log-level=debug ``` +For more troubleshooting help and environment issues, see the [Runbook](runbook.md#troubleshooting) or [Setup Guide](setup.md). + ## Next Steps +- **[Runbook](runbook.md)** - Running tests and troubleshooting guide - **[Architecture](architecture.md)** - Understand how the framework works - **[Development](development.md)** - Write your own tests - **CLI Reference** - Run `./bin/hyperfleet-e2e --help` diff --git a/docs/local-kind-setup.md b/docs/local-kind-setup.md deleted file mode 100644 index aa4b17d..0000000 --- a/docs/local-kind-setup.md +++ /dev/null @@ -1,106 +0,0 @@ -# Local E2E Testing with kind - -Run E2E tests locally using [kind](https://kind.sigs.k8s.io/) and RabbitMQ — no GCP dependencies. - -## Prerequisites - -- **Go** 1.25+ — [go.dev](https://go.dev/doc/install) -- **Docker** — [docker.com](https://www.docker.com/) or **Podman** — [podman.io](https://podman.io/) -- **kind** — [kind.sigs.k8s.io](https://kind.sigs.k8s.io/docs/user/quick-start/#installation) -- **kubectl** 1.28+ — [kubernetes.io](https://kubernetes.io/docs/tasks/tools/) -- **helm** 3+ — [helm.sh](https://helm.sh/docs/intro/install/) - -## Clone Repositories - -All component repos are required — images are built locally. - -```bash -for repo in hyperfleet-e2e hyperfleet-infra hyperfleet-api hyperfleet-sentinel hyperfleet-adapter; do - git clone https://github.com/openshift-hyperfleet/${repo}.git ~/projects/${repo} -done -``` - -> Repos outside `~/projects`? Set `PROJECTS_DIR` in your `.env` — see [Configuration](#configuration). - -## Quick Start - -```bash -# Copy config template -cp deploy-scripts/.env.example deploy-scripts/.env -# Uncomment HYPERFLEET_API_URL and MAESTRO_URL at the bottom - -# One command: cluster + images + deploy + port-forward -make local-up - -# Run tests -make e2e -``` - -For individual steps: - -```bash -./deploy-scripts/kind-local.sh setup # Cluster + RabbitMQ + Maestro + images -./deploy-scripts/kind-local.sh deploy # Deploy API + sentinels + adapters -./deploy-scripts/kind-local.sh port-forward # Forward API (:8000) + Maestro (:8100) -./deploy-scripts/kind-local.sh rebuild # Rebuild all images + restart -./deploy-scripts/kind-local.sh down # Remove everything -``` - -## Rebuilding After Code Changes - -```bash -# Rebuild one component -./deploy-scripts/kind-local.sh rebuild hyperfleet-adapter - -# Force rebuild without cache (after git pull) -./deploy-scripts/kind-local.sh rebuild --no-cache hyperfleet-adapter - -# Rebuild everything -./deploy-scripts/kind-local.sh rebuild --no-cache -``` - -Or via Make: - -```bash -make local-rebuild C=hyperfleet-adapter -make local-rebuild C=hyperfleet-adapter NO_CACHE=1 -``` - -## Running Specific Tests - -With `HYPERFLEET_API_URL` and `MAESTRO_URL` set in `.env`, just run: - -```bash -./bin/hyperfleet-e2e test --focus="\[Suite: cluster\]" --log-level=info -``` - -## Configuration - -All config lives in `deploy-scripts/.env` (gitignored). Copy from `.env.example` and uncomment what you need: - -```bash -cp deploy-scripts/.env.example deploy-scripts/.env -``` - -Local kind settings are at the bottom of the file: - -| Variable | Default | Description | -|----------|---------|-------------| -| `PROJECTS_DIR` | `~/projects` | Parent directory containing component repos | -| `INFRA_DIR` | `~/projects/hyperfleet-infra` | Path to hyperfleet-infra repo | -| `KIND_CLUSTER` | `kind` | Kind cluster name | -| `NAMESPACE` | `hyperfleet-local` | Kubernetes namespace | -| `HYPERFLEET_API_URL` | — | API URL for tests (`http://localhost:8000`) | -| `MAESTRO_URL` | — | Maestro URL for tests (`http://localhost:8100`) | - -## Troubleshooting - -**ImagePullBackOff** — Image not loaded into kind. Run `kind load docker-image `. With Podman: `podman save | kind load image-archive /dev/stdin`. - -**db-migrate crashing** — API binary doesn't match Helm chart: `./deploy-scripts/kind-local.sh rebuild --no-cache hyperfleet-api` - -**Container build cache stale** — Use `--no-cache` after `git pull`. - -**Connection refused** — Port-forwards died: `./deploy-scripts/kind-local.sh port-forward` - -**`make local-down`** removes components but leaves kind cluster. Full cleanup: `kind delete cluster`. diff --git a/docs/runbook.md b/docs/runbook.md index 28f7e03..36ca8b0 100644 --- a/docs/runbook.md +++ b/docs/runbook.md @@ -1,282 +1,131 @@ # HyperFleet E2E Test Runbook -> **Audience:** Developers running e2e tests locally +> **Audience:** Developers running E2E tests locally -This runbook provides step-by-step instructions for setting up, running, and troubleshooting HyperFleet E2E tests in a local development environment. +This runbook provides step-by-step instructions for running and troubleshooting HyperFleet E2E tests in a local development environment. ## Table of Contents - [Prerequisites](#prerequisites) -- [Prepare Test Environment](#prepare-test-environment) -- [Deploy CLM to Your Created GKE Cluster](#deploy-clm-to-your-created-gke-cluster) -- [Running E2E Tests Locally](#running-e2e-tests-locally) -- [Common Failure Modes and Troubleshooting](#common-failure-modes-and-troubleshooting) +- [Running E2E Tests](#running-e2e-tests) +- [Troubleshooting](#troubleshooting) - [Test Coverage in CI](#test-coverage-in-ci) ## Prerequisites -### Required Tools +**Environment Setup:** -The following tools must be installed on your local machine: +Before running tests, you need a running HyperFleet environment. See the [Setup Guide](setup.md) for complete instructions on deploying HyperFleet using: -| Tool | Minimum Version | Purpose | Installation | -|------|----------------|---------|--------------| -| **Go** | 1.25+ | Build and run the E2E framework | [go.dev](https://go.dev/doc/install) | -| **kubectl** | 1.28+ | Interact with Kubernetes clusters | [kubernetes.io](https://kubernetes.io/docs/tasks/tools/) | -| **helm** | 3.0+ | Deploy HyperFleet components | [helm.sh](https://helm.sh/docs/intro/install/) | -| **git** | 2.30+ | Clone repositories and manage Helm charts | [git-scm.com](https://git-scm.com/downloads) | -| **podman** or **docker** | Latest | Build container images (optional) | [podman.io](https://podman.io/) or [docker.com](https://www.docker.com/) | +- **Kind (local):** Fast setup, no cloud dependencies, uses port-forwarding +- **GCP:** Cloud environment, requires GCP access, uses LoadBalancer services +The environment guide covers: +- Tool installation and verification +- HyperFleet deployment (Kind or GCP) +- Port-forwarding / LoadBalancer setup +- Environment variable configuration +- Optional image settings override -### Verify Prerequisites +**Required environment variables** (set during environment setup): -Run these commands to verify your setup: +- `HYPERFLEET_API_URL` - HyperFleet API endpoint +- `MAESTRO_URL` - Maestro API endpoint +- `NAMESPACE` - Deployment namespace +- source env/env.local (Optional for tier 2 tests) -```bash -# Check Go version -go version # Should show 1.25 or higher - -# Check kubectl -kubectl version --client - -# Check Helm -helm version - -# Check Git -git --version - -# Check container tool (optional) -podman --version || docker --version -``` - -## Prepare Test Environment - -### Clone and Configure Terraform - -First, clone the infrastructure repository and navigate to the terraform directory: - -```bash -git clone https://github.com/openshift-hyperfleet/hyperfleet-infra/ -cd hyperfleet-infra/terraform -``` - -### Install GKE Cluster - -Run the following Terraform commands to deploy your GKE cluster. - -#### Terraform Commands - -```bash -# Copy and update the terraform variable file -cp envs/gke/dev.tfvars.example envs/gke/dev-.tfvars -# Update the following settings in your tfvars file -# developer_name - set to your name, use_pubsub=false, enable_dead_letter=false - -# Copy and update the terraform backend file -cp envs/gke/dev.tfbackend.example envs/gke/dev-.tfbackend -# update the prefix field with your name - -# Initialize terraform with your backend configuration -terraform init -backend-config=envs/gke/dev-.tfbackend - -# Preview the infrastructure changes -terraform plan -var-file=envs/gke/dev-.tfvars - -# Apply the infrastructure changes -terraform apply -var-file=envs/gke/dev-.tfvars -``` -### Install Maestro - -After deploying the GKE cluster, install Maestro and create a consumer: - -```bash -# Install Maestro -make install-maestro - -# Create Maestro consumer (default: cluster1, test adapter are configured with it) -make create-maestro-consumer MAESTRO_CONSUMER=cluster1 - -# Patch the service type to LoadBalancer to expose a external IP -kubectl patch svc maestro -n maestro -p '{"spec":{"type":"LoadBalancer"}}' -``` - -### Login to Cluster - -After the deployment completes, log in to the cluster locally using the output command (replace your name): - -```bash -gcloud container clusters get-credentials hyperfleet-dev- --zone us-central1-a --project hcm-hyperfleet -``` - -## Deploy CLM to Your Created GKE Cluster - -### Clone the Repository - -```bash -git clone https://github.com/openshift-hyperfleet/hyperfleet-e2e.git -cd hyperfleet-e2e -``` - -### Deploy HyperFleet Components - -The E2E tests require a running HyperFleet environment (API, Sentinel, and Adapters). - -```bash -# 1. Copy the example configuration -cd deploy-scripts/ -cp .env.example .env - -# 2. Edit .env with your settings -vim .env -source .env - -# 3. Deploy with custom configuration -./deploy-clm.sh --action install --namespace "${NAMESPACE}" - -``` - -**Key Configuration Parameters** (in `.env`): - -```bash -# GCP configuration (required for Pub/Sub) -export GCP_PROJECT_ID="${GCP_PROJECT_ID:-hcm-hyperfleet}" - -# Image configuration (optional - defaults to latest) -export API_IMAGE_TAG="${API_IMAGE_TAG:-latest}" -export SENTINEL_IMAGE_TAG="${SENTINEL_IMAGE_TAG:-latest}" -export ADAPTER_IMAGE_TAG="${ADAPTER_IMAGE_TAG:-latest}" - -# Adapters to deploy (optional) -export CLUSTER_TIER0_ADAPTERS_DEPLOYMENT="${CLUSTER_TIER0_ADAPTERS_DEPLOYMENT:-cl-namespace,cl-job,cl-deployment,cl-maestro}" -export NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT="${NODEPOOL_TIER0_ADAPTERS_DEPLOYMENT:-np-configmap}" - -# Adapters for API cluster/nodepool configuration -export API_ADAPTERS_CLUSTER="${API_ADAPTERS_CLUSTER:-cl-namespace,cl-job,cl-deployment,cl-maestro}" -export API_ADAPTERS_NODEPOOL="${API_ADAPTERS_NODEPOOL:-np-configmap}" - - -# NAMESPACE must be unique to prevent GCP Pub/Sub topic/subscription collisions. -# Set in the .env.example file as: -export NAMESPACE="${NAMESPACE:-hyperfleet-e2e-$(echo ${USER:-default} | tr '[:upper:]' '[:lower:]')}" -# Or can manually set it with as the namespace is DNS-1123 compliant -export NAMESPACE= - -``` +## Running E2E Tests -#### Verify Deployment +### Build the E2E Binary ```bash -# Check Helm releases -helm list -n "${NAMESPACE}" - -# Verify all pods are running -kubectl get pods -n "${NAMESPACE}" - -# Check pod logs if any issues -kubectl logs -n "${NAMESPACE}" -``` - -**Expected State**: All pods should show status `Running` with `READY 1/1`. - - -## Running E2E Tests Locally - -### Build the E2E Framework - -```bash -# Generate API client from OpenAPI spec -make generate - -# Build the hyperfleet-e2e binary +# Generate API client from OpenAPI spec and build make build # Verify the build ./bin/hyperfleet-e2e --help ``` -### Configure API Access -If the Maestro and Hyperfleet API services are not exposed via LoadBalancer, you'll need to port-forward them locally: +### Run Tests -```bash -# Terminal 1 - Port-forward Maestro API (local port 8000) -kubectl port-forward -n maestro svc/maestro 8000:8000 +Make sure you've set the required environment variables from the [Prerequisites](#prerequisites) section: -# Terminal 2 - Port-forward Hyperfleet API (local port 8001) -kubectl port-forward -n ${NAMESPACE} svc/hyperfleet-api 8001:8000 -``` +- `HYPERFLEET_API_URL` +- `MAESTRO_URL` +- `NAMESPACE` -Then configure your environment variables: +**Run tests by tier:** ```bash -export MAESTRO_URL=http://localhost:8000 -export HYPERFLEET_API_URL=http://localhost:8001 +# Run tier0 tests (critical path) +./bin/hyperfleet-e2e test --label-filter=tier0 + +# Run tier1 tests (important features) +./bin/hyperfleet-e2e test --label-filter=tier1 + +# Run tier2 tests (edge cases - requires sourcing env/env.local first) +source env/env.local && ./bin/hyperfleet-e2e test --label-filter=tier2 ``` -### Basic Test Execution +**Run tests by suite:** ```bash -# Run tests with specific label -./bin/hyperfleet-e2e test --label-filter=tier0 - -# Run tests for specific suite +# Run all cluster tests ./bin/hyperfleet-e2e test --focus "\[Suite: cluster\]" -# Run specific test by description -./bin/hyperfleet-e2e test --focus "Create Cluster via API" +# Run all nodepool tests +./bin/hyperfleet-e2e test --focus "\[Suite: nodepool\]" +# Run all adapter tests +./bin/hyperfleet-e2e test --focus "\[Suite: adapter\]" ``` -**Example:** +**Run specific tests by description:** ```bash -# Using environment variable -export HYPERFLEET_API_URL= -export MAESTRO_URL= -export NAMESPACE= -# Run all tier0 cases -./bin/hyperfleet-e2e test --label-filter=tier0 - -# Run all tier1 cases -./bin/hyperfleet-e2e test --label-filter=tier1 +./bin/hyperfleet-e2e test --focus "Create Cluster via API" ``` -### View All Options +**View available options:** ```bash -# Show all available commands +# Show all commands ./bin/hyperfleet-e2e --help # Show test command options ./bin/hyperfleet-e2e test --help ``` -## Common Failure Modes and Troubleshooting +## Troubleshooting -### Tools and Tips +### Debugging Tools -The following tools are available to help debug and interact with HyperFleet components: +The following tools can help debug and interact with HyperFleet components: | Tool | Purpose | Link | |------|---------|------| -| **Hyperfleet Explorer** | View cluster/nodepool API responses | [https://github.com/rh-amarin/hyperfleet-explorer](https://github.com/rh-amarin/hyperfleet-explorer) | -| **Scripts** | Interact with various component APIs and perform operations | [https://github.com/rh-amarin/hyperfleet-scripts](https://github.com/rh-amarin/hyperfleet-scripts) | -| **k9s** | Kubernetes CLI to manage your clusters in style! | [https://k9scli.io/](https://k9scli.io/) | +| **HyperFleet Explorer** | View cluster/nodepool API responses in a UI | [hyperfleet-explorer](https://github.com/rh-amarin/hyperfleet-explorer) | +| **HyperFleet Scripts** | Interact with component APIs and perform operations | [hyperfleet-scripts](https://github.com/openshift-hyperfleet/hyperfleet-scripts) | +| **k9s** | Kubernetes CLI to manage clusters | [k9scli.io](https://k9scli.io/) | + +### Common Issues + +#### 1. Namespace Mismatch -### General Troubleshooting +**Problem:** Tests fail to find adapters or create resources. -#### Namespace Configuration +**Cause:** The `NAMESPACE` environment variable doesn't match the deployment namespace. Some tests deploy adapters dynamically and must target the same namespace where HyperFleet components are running. -**Important:** Set the `NAMESPACE` environment variable to match the namespace used during deployment. Some test cases deploy adapters dynamically and need to target the same namespace where your HyperFleet components are running. +**Solution:** ```bash -# Set NAMESPACE if you deployed to a unique namespace -export NAMESPACE= +export NAMESPACE= ./bin/hyperfleet-e2e test --label-filter=tier0 ``` -#### Timeout Errors +#### 2. Timeout Errors -If you encounter timeout errors like this: +**Problem:** Test failures with timeout errors: ``` [FAILED] cluster creation failed @@ -285,14 +134,16 @@ Unexpected error: context deadline exceeded (Client.Timeout exceeded while awaiting headers) ``` -**Troubleshooting steps:** +**Solution:** + +1. **Verify all pods are running:** -1. **Check if all pods are running:** ```bash kubectl get pods -n ${NAMESPACE} ``` - Expected output - all pods should show `Running` with `READY 1/1`: + Expected output — all pods should show `Running` with `READY 1/1`: + ``` NAME READY STATUS RESTARTS AGE hyperfleet-api-xxx 1/1 Running 0 10m @@ -302,67 +153,111 @@ Unexpected error: ``` 2. **Check pod logs for errors:** + ```bash - # Check API logs + # API logs kubectl logs -n ${NAMESPACE} deployment/hyperfleet-api --tail=50 - # Check Sentinel logs + # Sentinel logs kubectl logs -n ${NAMESPACE} deployment/hyperfleet-sentinel --tail=50 - # Check adapter logs + # Adapter logs (example) kubectl logs -n ${NAMESPACE} deployment/cl-namespace-adapter --tail=50 ``` -3. **Verify API connectivity:** +3. **Test API connectivity:** + ```bash - # Test API endpoint curl -f -X GET ${HYPERFLEET_API_URL}/api/hyperfleet/v1/clusters/ ``` + Expected: HTTP 200 response with JSON + 4. **Check service endpoints:** + ```bash - # Verify LoadBalancer has external IP + # For GCP deployments - verify LoadBalancer has external IP + kubectl get svc -n ${NAMESPACE} hyperfleet-api + + # For Kind deployments - verify port-forwarding is active + lsof -i :${API_LOCAL_PORT} + ``` + +#### 3. Image Pull Errors + +**Problem:** Pods stuck in `ImagePullBackOff` or `ErrImagePull` status. + +**Solution:** + +1. Check if `env/env.local` image settings match your infra deployment. See [Configure Test Settings](setup.md#configure-test-settings) in the Setup guide for how to override image settings. +2. Verify image registry credentials are configured in your cluster +3. Check pod events: + + ```bash + kubectl describe pod -n ${NAMESPACE} + ``` + +#### 4. Port-Forward Connection Refused (Kind) + +**Problem:** Tests fail with "connection refused" when using Kind. + +**Solution:** + +1. Verify port-forward processes are running: + + ```bash + ps aux | grep "port-forward" + ``` + +2. Restart port-forwarding in separate terminals (see [Kind setup](setup.md#option-1-kind-local) in the Setup guide) + +3. Verify services exist: + + ```bash + kubectl get svc -n maestro maestro kubectl get svc -n ${NAMESPACE} hyperfleet-api ``` ## Test Coverage in CI -### How Your Tests Run in CI +Tests you run locally are automatically executed in nightly CI jobs for continuous validation. -The test cases you run locally are automatically picked up and executed in nightly CI jobs to ensure continuous validation of the system. +### CI Jobs -**Job Configuration File:** All job definitions can be found in the [openshift-hyperfleet-hyperfleet-e2e-main__e2e.yaml](https://github.com/openshift/release/blob/main/ci-operator/config/openshift-hyperfleet/hyperfleet-e2e/openshift-hyperfleet-hyperfleet-e2e-main__e2e.yaml) configuration file. +**Job Configuration:** [openshift-hyperfleet-hyperfleet-e2e-main__e2e.yaml](https://github.com/openshift/release/blob/main/ci-operator/config/openshift-hyperfleet/hyperfleet-e2e/openshift-hyperfleet-hyperfleet-e2e-main__e2e.yaml) | Job Name | Test Tier | Schedule | Description | |----------|-----------|----------|-------------| -| **tier0-nightly** | tier0 | Daily | Runs basic smoke tests and happy critical path validations | -| **tier1-nightly** | tier1 | Daily | Runs extended test suite | +| **tier0-nightly** | tier0 | Daily | Critical path smoke tests | +| **tier1-nightly** | tier1 | Daily | Extended test suite | -### Job Configuration and Management +### Managing CI Jobs -For comprehensive information about CI jobs, see the [Add HyperFleet E2E CI Job in Prow](https://github.com/openshift-hyperfleet/architecture/blob/main/hyperfleet/docs/test-release/add-hyperfleet-e2e-ci-job-in-prow.md) documentation, which covers: +For details on CI job configuration, viewing results, manual triggering, and debugging failures, see: -- How CI jobs are configured in Prow -- Viewing job results -- Triggering jobs manually -- Debugging job failures +- [Add HyperFleet E2E CI Job in Prow](https://github.com/openshift-hyperfleet/architecture/blob/main/hyperfleet/docs/test-release/add-hyperfleet-e2e-ci-job-in-prow.md) + +--- ## Changelog -All notable changes to this document will be documented in this section. +All notable changes to this document are documented below. + +Format based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). + +### 2026-06-10 -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +#### Changed +- Restructured runbook for clarity and consistency +- Separated Kind and GCP setup into distinct sections with clearer step-by-step instructions into setup.md +- Improved troubleshooting section with numbered common issues and solutions +- Streamlined test execution instructions with better examples +- Cleaned up formatting and removed duplicate content ### 2026-03-30 #### Added - Initial runbook with prerequisites, environment setup, test execution, troubleshooting, and CI coverage sections -- Prerequisites section with required tools and verification steps -- Prepare Test Environment section with Terraform and GKE cluster setup -- Deploy CLM section with HyperFleet component deployment instructions -- Running E2E Tests Locally section with build and execution commands -- Common Failure Modes and Troubleshooting section with debugging tools and tips -- Test Coverage in CI section documenting nightly jobs and Prow integration diff --git a/docs/setup.md b/docs/setup.md new file mode 100644 index 0000000..b84ab5e --- /dev/null +++ b/docs/setup.md @@ -0,0 +1,150 @@ +# Setup Guide + +This guide covers setting up a HyperFleet environment for running E2E tests locally. + +## Table of Contents + +- [Deployment Options](#deployment-options) + - [Option 1: Kind (Local)](#option-1-kind-local) + - [Option 2: GCP](#option-2-gcp) +- [Configure Test Settings](#configure-test-settings) +- [Troubleshooting](#troubleshooting) + +## Deployment Options + +Clone the infrastructure repository: + +```bash +git clone https://github.com/openshift-hyperfleet/hyperfleet-infra/ +cd hyperfleet-infra/terraform +``` + +Choose one of the following deployment options based on your needs: + +- **Kind (local):** Fast setup, no cloud dependencies, uses port-forwarding +- **GCP:** Cloud environment, requires GCP access, slower setup, uses LoadBalancer services + +### Option 1: Kind (Local) + +**1. Deploy HyperFleet to Kind cluster:** + +```bash +export NAMESPACE= +NAMESPACE=${NAMESPACE} HELMFILE_ENV=e2e-kind make local-up-kind +``` + +**2. Set up port-forwarding in two separate terminals:** + +```bash +# Terminal 1 - Port-forward Maestro API +export MAESTRO_LOCAL_PORT=8100 +kubectl port-forward -n maestro svc/maestro ${MAESTRO_LOCAL_PORT}:8000 + +# Terminal 2 - Port-forward HyperFleet API +export API_LOCAL_PORT=8000 +kubectl port-forward -n ${NAMESPACE} svc/hyperfleet-api ${API_LOCAL_PORT}:8000 +``` + +**3. Configure environment variables:** + +```bash +export MAESTRO_URL=http://localhost:${MAESTRO_LOCAL_PORT} +export HYPERFLEET_API_URL=http://localhost:${API_LOCAL_PORT} +export NAMESPACE= +``` + +**4. Verify deployment:** + +```bash +# Check Helm releases +helm list -n ${NAMESPACE} + +# Verify all pods are running +kubectl get pods -n ${NAMESPACE} + +# Test API connectivity +curl -f -X GET ${HYPERFLEET_API_URL}/api/hyperfleet/v1/clusters/ +``` + +### Option 2: GCP + +**1. Deploy HyperFleet to GCP cluster:** + +```bash +export NAMESPACE= +NAMESPACE=${NAMESPACE} HELMFILE_ENV=e2e-gcp make local-up-gcp +``` + +**2. Expose Maestro service via LoadBalancer:** + +```bash +# Patch Maestro service to expose external IP +kubectl patch svc maestro -n maestro -p '{"spec":{"type":"LoadBalancer"}}' + +# Wait for external IPs to be assigned (may take 1-2 minutes) +kubectl get svc maestro -n maestro -w +kubectl get svc hyperfleet-api -n ${NAMESPACE} -w +``` + +**3. Configure environment variables:** + +```bash +export API_EXTERNAL_IP=$(kubectl get svc hyperfleet-api -n ${NAMESPACE} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') +export MAESTRO_EXTERNAL_IP=$(kubectl get svc maestro -n maestro -o jsonpath='{.status.loadBalancer.ingress[0].ip}') +export HYPERFLEET_API_URL=http://${API_EXTERNAL_IP}:8000 +export MAESTRO_URL=http://${MAESTRO_EXTERNAL_IP}:8000 +export NAMESPACE= +``` + +**4. Verify deployment:** + +```bash +# Check Helm releases +helm list -n ${NAMESPACE} + +# Verify all pods are running +kubectl get pods -n ${NAMESPACE} + +# Test API connectivity +curl -f -X GET ${HYPERFLEET_API_URL}/api/hyperfleet/v1/clusters/ +``` + +## Configure Test Settings + +### Override Image Settings (Optional) + +If your deployment uses custom image settings, update `env/env.local` in this repo to match your infrastructure deployment settings: + +- **Kind deployments:** Match settings from [`infra/terraform/env.kind`](https://github.com/openshift-hyperfleet/hyperfleet-infra/blob/main/env.kind#L18-L21) +- **GCP deployments:** Match settings from [`infra/terraform/env.gcp`](https://github.com/openshift-hyperfleet/hyperfleet-infra/blob/main/env.gcp#L18-L21) + +**Update `env/env.local`:** + +```bash +# env/env.local +IMAGE_REGISTRY= +_IMAGE_REPO= +_IMAGE_TAG= +``` + +**Source the configuration:** + +```bash +source env/env.local +``` + +This configuration is required for running tier2 tests. + +## Troubleshooting + +### Infrastructure Setup Issues + +For additional help with infrastructure deployment and configuration, see: + +- [hyperfleet-infra README](https://github.com/openshift-hyperfleet/hyperfleet-infra/blob/main/README.md) - Main infrastructure documentation + +For test-specific troubleshooting (timeouts, API errors, namespace mismatches), see the [Runbook Troubleshooting](runbook.md#troubleshooting) section. + +--- + +**Next Steps:** Once your environment is set up, see the [Runbook](runbook.md) for running tests and troubleshooting. diff --git a/env/env.local b/env/env.local new file mode 100644 index 0000000..84e331c --- /dev/null +++ b/env/env.local @@ -0,0 +1,60 @@ +# ============================================================================ +# NOTE: Local development environment configuration +# Can be used to test E2E tests locally - kind or gcp clusters +# Can be modified without testing the changes in the E2E pipeline +# ============================================================================ + + +# Image Registry Configuration +export IMAGE_REGISTRY="${IMAGE_REGISTRY:-registry.ci.openshift.org}" + +# API Component Configuration +export API_IMAGE_REPO="${API_IMAGE_REPO:-ci/hyperfleet-api}" +export API_IMAGE_TAG="${API_IMAGE_TAG:-latest}" +export API_SERVICE_TYPE="${API_SERVICE_TYPE:-LoadBalancer}" + + +# ===================================================================== +# API Adapter Configuration +# ===================================================================== +# NOTE: These are SEPARATE from tier0 deployment configuration above +# These should be set based on specific test case requirements +# Set per test case as needed + +# Adapters for API cluster configuration +export API_ADAPTERS_CLUSTER="${API_ADAPTERS_CLUSTER:-cl-namespace,cl-job,cl-deployment,cl-maestro}" + +# Adapters for API nodepool configuration +export API_ADAPTERS_NODEPOOL="${API_ADAPTERS_NODEPOOL:-np-configmap}" + +# Sentinel Component Configuration +export SENTINEL_IMAGE_REPO="${SENTINEL_IMAGE_REPO:-ci/hyperfleet-sentinel}" +export SENTINEL_IMAGE_TAG="${SENTINEL_IMAGE_TAG:-latest}" +export SENTINEL_BROKER_TYPE="${SENTINEL_BROKER_TYPE:-googlepubsub}" +export SENTINEL_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING="${SENTINEL_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING:-true}" + +# Adapter Component Configuration +export ADAPTER_IMAGE_REPO="${ADAPTER_IMAGE_REPO:-ci/hyperfleet-adapter}" +export ADAPTER_IMAGE_TAG="${ADAPTER_IMAGE_TAG:-latest}" +export ADAPTER_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING="${ADAPTER_GOOGLEPUBSUB_CREATE_TOPIC_IF_MISSING:-true}" +export ADAPTER_GOOGLEPUBSUB_CREATE_SUBSCRIPTION_IF_MISSING="${ADAPTER_GOOGLEPUBSUB_CREATE_SUBSCRIPTION_IF_MISSING:-true}" + + +# Helm Chart Sources +# Note: ADAPTER_CHART_* and API_CHART_* vars are also required by tier2 E2E tests +# (e.g., crash recovery) which deploy dedicated adapters and upgrade API config at runtime. +# When running tier2 tests in CI, ensure these are exported alongside GINKGO_LABEL_FILTER=tier2. +export API_CHART_REPO="${API_CHART_REPO:-https://github.com/openshift-hyperfleet/hyperfleet-api.git}" +export API_CHART_REF="${API_CHART_REF:-main}" +export API_CHART_PATH="${API_CHART_PATH:-charts}" + +export SENTINEL_CHART_REPO="${SENTINEL_CHART_REPO:-https://github.com/openshift-hyperfleet/hyperfleet-sentinel.git}" +export SENTINEL_CHART_REF="${SENTINEL_CHART_REF:-main}" +export SENTINEL_CHART_PATH="${SENTINEL_CHART_PATH:-charts}" + +export ADAPTER_CHART_REPO="${ADAPTER_CHART_REPO:-https://github.com/openshift-hyperfleet/hyperfleet-adapter.git}" +export ADAPTER_CHART_REF="${ADAPTER_CHART_REF:-main}" +export ADAPTER_CHART_PATH="${ADAPTER_CHART_PATH:-charts}" + + +export GCP_PROJECT_ID="${GCP_PROJECT_ID:-hcm-hyperfleet}" \ No newline at end of file diff --git a/testdata/adapter-configs/cl-deployment/adapter-config.yaml b/testdata/adapter-configs/cl-deployment/adapter-config.yaml deleted file mode 100644 index d20254b..0000000 --- a/testdata/adapter-configs/cl-deployment/adapter-config.yaml +++ /dev/null @@ -1,23 +0,0 @@ -adapter: - name: cl-deployment - #version: "0.1.0" - -# Log the full merged configuration after load (default: false) -debug_config: false -log: - level: debug - -clients: - hyperfleet_api: - base_url: CHANGE_ME - version: v1 - timeout: 2s - retry_attempts: 3 - retry_backoff: exponential - - broker: - subscription_id: CHANGE_ME - topic: CHANGE_ME - - kubernetes: - api_version: "v1" diff --git a/testdata/adapter-configs/cl-deployment/adapter-task-config.yaml b/testdata/adapter-configs/cl-deployment/adapter-task-config.yaml deleted file mode 100644 index 406ae4e..0000000 --- a/testdata/adapter-configs/cl-deployment/adapter-task-config.yaml +++ /dev/null @@ -1,177 +0,0 @@ -# Simple valid HyperFleet Adapter Configuration for testing - -# Parameters with all required variables -params: - - name: "clusterId" - source: "event.id" - type: "string" - required: true - -# Preconditions with valid operators and CEL expressions -preconditions: - - name: "clusterStatus" - api_call: - method: "GET" - url: "/clusters/{{ .clusterId }}" - timeout: 10s - retry_attempts: 3 - retry_backoff: "exponential" - capture: - - name: "clusterName" - field: "name" - - name: "generationSpec" - field: "generation" - - name: "is_deleting" - expression: "has(clusterStatus.deleted_time)" - - name: "clusterNotReconciled" - expression: | - status.conditions.filter(c, c.type == "Reconciled").size() > 0 - ? status.conditions.filter(c, c.type == "Reconciled")[0].status != "True" - : true - - name: "clusterReconciledTTL" - expression: | - (timestamp(now()) - timestamp( - status.conditions.filter(c, c.type == "Reconciled").size() > 0 - ? status.conditions.filter(c, c.type == "Reconciled")[0].last_transition_time - : now() - )).getSeconds() > 300 - - - name: "clusterAdapterStatus" - api_call: - method: "GET" - url: "/clusters/{{ .clusterId }}/statuses" - timeout: 10s - retry_attempts: 3 - retry_backoff: "exponential" - capture: - - name: "clusterJobStatus" - field: "{.items[?(@.adapter=='cl-job')].conditions[?(@.type=='Available')].status}" - - - name: "validationCheck" - expression: | - is_deleting || (clusterJobStatus == "True" && (clusterNotReconciled || clusterReconciledTTL)) - -# Resources with valid K8s manifests -resources: - - name: "testDeployment" - transport: - client: "kubernetes" - manifest: - ref: "/etc/adapter/deployment.yaml" - discovery: - namespace: "{{ .clusterId }}" - by_selectors: - label_selector: - hyperfleet.io/resource-type: "deployment" - hyperfleet.io/cluster-id: "{{ .clusterId }}" - lifecycle: - delete: - propagationPolicy: Background - when: - expression: "is_deleting" - -# Post-processing with valid CEL expressions -post: - payloads: - - name: "clusterStatusPayload" - build: - adapter: "{{ .adapter.name }}" - conditions: - # Applied: Deployment successfully created - - type: "Applied" - status: - expression: | - has(resources.testDeployment) ? "True" : "False" - reason: - expression: | - has(resources.testDeployment) - ? "DeploymentApplied" - : "DeploymentPending" - message: - expression: | - has(resources.testDeployment) - ? "testDeployment manifest applied successfully" - : "testDeployment is pending to be applied" - # Available: Check deployment status conditions - - type: "Available" - status: - expression: | - has(resources.testDeployment) ? - ( resources.?testDeployment.?status.?conditions.orValue([]).exists(c, c.type == "Available") - ? resources.testDeployment.status.conditions.filter(c, c.type == "Available")[0].status : "False") - : "False" - reason: - expression: | - resources.?testDeployment.?status.?conditions.orValue([]).exists(c, c.type == "Available") - ? resources.testDeployment.status.conditions.filter(c, c.type == "Available")[0].reason - : resources.?testDeployment.?status.?conditions.orValue([]).exists(c, c.type == "Progressing" && c.status == "False") ? "DeploymentFailed" - : resources.?testDeployment.?status.hasValue() ? "DeploymentInProgress" : "DeploymentPending" - message: - expression: | - resources.?testDeployment.?status.?conditions.orValue([]).exists(c, c.type == "Available") - ? resources.testDeployment.status.conditions.filter(c, c.type == "Available")[0].message - : resources.?testDeployment.?status.?conditions.orValue([]).exists(c, c.type == "Progressing" && c.status == "False") ? "Deployment failed" - : resources.?testDeployment.?status.hasValue() ? "Deployment in progress" : "Deployment is pending" - # Health: Adapter execution status (runtime) - - type: "Health" - status: - expression: | - adapter.?executionStatus.orValue("") == "success" - && !adapter.?resourcesSkipped.orValue(false) - ? "True" - : "False" - reason: - expression: | - adapter.?executionStatus.orValue("") != "success" - ? "ExecutionFailed:" + adapter.?executionError.?phase.orValue("unknown") - : adapter.?resourcesSkipped.orValue(false) - ? "ResourcesSkipped" - : "Healthy" - message: - expression: | - adapter.?executionStatus.orValue("") != "success" - ? "Adapter failed at phase [" - + adapter.?executionError.?phase.orValue("unknown") - + "] step [" - + adapter.?executionError.?step.orValue("unknown") - + "]: " - + adapter.?executionError.?message.orValue(adapter.?errorMessage.orValue("no details")) - : adapter.?resourcesSkipped.orValue(false) - ? "Resources skipped: " + adapter.?skipReason.orValue("unknown reason") - : "Adapter execution completed successfully" - # Finalized: True once deployment is confirmed deleted - - type: "Finalized" - status: - expression: | - is_deleting - && adapter.?executionStatus.orValue("") == "success" - && !adapter.?resourcesSkipped.orValue(false) - && !resources.?testDeployment.hasValue() - ? "True" - : "False" - reason: - expression: | - !is_deleting ? "NotDeleting" - : !resources.?testDeployment.hasValue() - ? "CleanupConfirmed" - : "CleanupInProgress" - message: - expression: | - !is_deleting ? "Resource not marked for deletion" - : !resources.?testDeployment.hasValue() - ? "All resources deleted; cleanup confirmed" - : "Deletion in progress; waiting for deployment to be removed" - # Event generation ID metadata field needs to use expression to avoid interpolation issues - observed_generation: - expression: "generationSpec" - observed_time: '{{ now | date "2006-01-02T15:04:05Z07:00" }}' - - post_actions: - - name: "reportClusterStatus" - api_call: - method: "PUT" - url: "/clusters/{{ .clusterId }}/statuses" - headers: - - name: "Content-Type" - value: "application/json" - body: "{{ .clusterStatusPayload }}" diff --git a/testdata/adapter-configs/cl-deployment/adapter-task-resource-deployment.yaml b/testdata/adapter-configs/cl-deployment/adapter-task-resource-deployment.yaml deleted file mode 100644 index 6f66b37..0000000 --- a/testdata/adapter-configs/cl-deployment/adapter-task-resource-deployment.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Test deployment template -apiVersion: apps/v1 -kind: Deployment -metadata: - name: "test-nginx-{{ .clusterId }}" - namespace: "{{ .clusterId }}" - labels: - hyperfleet.io/cluster-id: "{{ .clusterId }}" - hyperfleet.io/resource-type: "deployment" - annotations: - hyperfleet.io/generation: "{{ .generationSpec }}" -spec: - replicas: 1 - selector: - matchLabels: - app: test - hyperfleet.io/cluster-id: "{{ .clusterId }}" - template: - metadata: - labels: - app: test - hyperfleet.io/cluster-id: "{{ .clusterId }}" - spec: - containers: - - name: test - image: nginx:latest - ports: - - containerPort: 80 - diff --git a/testdata/adapter-configs/cl-deployment/values.yaml b/testdata/adapter-configs/cl-deployment/values.yaml deleted file mode 100644 index 994cbc3..0000000 --- a/testdata/adapter-configs/cl-deployment/values.yaml +++ /dev/null @@ -1,32 +0,0 @@ -adapterConfig: - create: true - files: - adapter-config.yaml: cl-deployment/adapter-config.yaml - log: - level: debug - -adapterTaskConfig: - create: true - files: - task-config.yaml: cl-deployment/adapter-task-config.yaml - deployment.yaml: cl-deployment/adapter-task-resource-deployment.yaml - -broker: - type: googlepubsub - create: true - googlepubsub: - projectId: CHANGE_ME - subscriptionId: CHANGE_ME - topic: CHANGE_ME - deadLetterTopic: CHANGE_ME - -image: - registry: CHANGE_ME - repository: ci/hyperfleet-adapter - pullPolicy: Always - tag: latest - -rbac: - resources: - - deployments - - deployments/status diff --git a/testdata/adapter-configs/cl-job/adapter-config.yaml b/testdata/adapter-configs/cl-job/adapter-config.yaml deleted file mode 100644 index 88098e0..0000000 --- a/testdata/adapter-configs/cl-job/adapter-config.yaml +++ /dev/null @@ -1,23 +0,0 @@ -adapter: - name: cl-job - #version: "0.1.0" - -# Log the full merged configuration after load (default: false) -debug_config: false -log: - level: debug - -clients: - hyperfleet_api: - base_url: CHANGE_ME - version: v1 - timeout: 2s - retry_attempts: 3 - retry_backoff: exponential - - broker: - subscription_id: CHANGE_ME - topic: CHANGE_ME - - kubernetes: - api_version: "v1" diff --git a/testdata/adapter-configs/cl-job/adapter-task-config.yaml b/testdata/adapter-configs/cl-job/adapter-task-config.yaml deleted file mode 100644 index a59aac1..0000000 --- a/testdata/adapter-configs/cl-job/adapter-task-config.yaml +++ /dev/null @@ -1,186 +0,0 @@ -# Simple valid HyperFleet Adapter Configuration for testing - -# Parameters with all required variables -params: - - name: "clusterId" - source: "event.id" - type: "string" - required: true - -# Preconditions with valid operators and CEL expressions -preconditions: - - name: "clusterStatus" - api_call: - method: "GET" - url: "/clusters/{{ .clusterId }}" - timeout: 10s - retry_attempts: 3 - retry_backoff: "exponential" - capture: - - name: "clusterName" - field: "name" - - name: "generationSpec" - field: "generation" - - name: "is_deleting" - expression: "has(clusterStatus.deleted_time)" - - name: "clusterNotReconciled" - expression: | - status.conditions.filter(c, c.type == "Reconciled").size() > 0 - ? status.conditions.filter(c, c.type == "Reconciled")[0].status != "True" - : true - - name: "clusterReconciledTTL" - expression: | - (timestamp(now()) - timestamp( - status.conditions.filter(c, c.type == "Reconciled").size() > 0 - ? status.conditions.filter(c, c.type == "Reconciled")[0].last_transition_time - : now() - )).getSeconds() > 300 - - - name: "clusterAdapterStatus" - api_call: - method: "GET" - url: "/clusters/{{ .clusterId }}/statuses" - timeout: 10s - retry_attempts: 3 - retry_backoff: "exponential" - capture: - - name: "clusterNamespaceStatus" - field: "{.items[?(@.adapter=='cl-namespace')].data.namespace.status}" - - - name: "validationCheck" - expression: | - is_deleting || (clusterNamespaceStatus == "Active" && (clusterNotReconciled || clusterReconciledTTL)) - -# Resources with valid K8s manifests -resources: - # Hello-world job created in the cluster namespace - - name: "testJob" - transport: - client: "kubernetes" - manifest: - ref: "/etc/adapter/job.yaml" - discovery: - namespace: "{{ .clusterId }}" - by_selectors: - label_selector: - hyperfleet.io/cluster-id: "{{ .clusterId }}" - hyperfleet.io/resource-type: "job" - lifecycle: - delete: - propagationPolicy: Background - when: - expression: "is_deleting" - -# Post-processing with valid CEL expressions -# Reports Applied, Available, and Health conditions for the hello-world job -post: - payloads: - - name: "clusterStatusPayload" - build: - adapter: "{{ .adapter.name }}" - conditions: - # Applied: Job successfully created - - type: "Applied" - status: - expression: | - has(resources.testJob) ? "True" : "False" - reason: - expression: | - has(resources.testJob) - ? "JobApplied" - : "JobPending" - message: - expression: | - has(resources.testJob) - ? "testJob manifest applied successfully" - : "testJob is pending to be applied" - # Available: True when the job has completed successfully - - type: "Available" - status: - expression: | - has(resources.testJob) ? - ( resources.?testJob.?status.?conditions.orValue([]).exists(c, c.type == "Complete") - ? resources.testJob.status.conditions.filter(c, c.type == "Complete")[0].status - : "False") - : "False" - reason: - expression: | - resources.?testJob.?status.?conditions.orValue([]).exists(c, c.type == "Complete") - ? "JobComplete" - : resources.?testJob.?status.?conditions.orValue([]).exists(c, c.type == "Failed") - ? "JobFailed" - : resources.?testJob.?status.hasValue() - ? "JobRunning" - : "JobPending" - message: - expression: | - resources.?testJob.?status.?conditions.orValue([]).exists(c, c.type == "Complete") - ? "Hello-world job completed successfully" - : resources.?testJob.?status.?conditions.orValue([]).exists(c, c.type == "Failed") - ? "Hello-world job failed" - : resources.?testJob.?status.hasValue() - ? "Hello-world job is running" - : "Hello-world job is pending" - # Health: Adapter execution status (runtime) - - type: "Health" - status: - expression: | - adapter.?executionStatus.orValue("") == "success" - && !adapter.?resourcesSkipped.orValue(false) - ? "True" - : "False" - reason: - expression: | - adapter.?executionStatus.orValue("") != "success" - ? "ExecutionFailed:" + adapter.?executionError.?phase.orValue("unknown") - : adapter.?resourcesSkipped.orValue(false) - ? "ResourcesSkipped" - : "Healthy" - message: - expression: | - adapter.?executionStatus.orValue("") != "success" - ? "Adapter failed at phase [" - + adapter.?executionError.?phase.orValue("unknown") - + "] step [" - + adapter.?executionError.?step.orValue("unknown") - + "]: " - + adapter.?executionError.?message.orValue(adapter.?errorMessage.orValue("no details")) - : adapter.?resourcesSkipped.orValue(false) - ? "Resources skipped: " + adapter.?skipReason.orValue("unknown reason") - : "Adapter execution completed successfully" - # Finalized: True once all job resources are confirmed deleted - - type: "Finalized" - status: - expression: | - is_deleting - && adapter.?executionStatus.orValue("") == "success" - && !adapter.?resourcesSkipped.orValue(false) - && !resources.?testJob.hasValue() - ? "True" - : "False" - reason: - expression: | - !is_deleting ? "NotDeleting" - : !resources.?testJob.hasValue() - ? "CleanupConfirmed" - : "CleanupInProgress" - message: - expression: | - !is_deleting ? "Resource not marked for deletion" - : !resources.?testJob.hasValue() - ? "All resources deleted; cleanup confirmed" - : "Deletion in progress; waiting for resources to be removed" - # Event generation ID metadata field needs to use expression to avoid interpolation issues - observed_generation: - expression: "generationSpec" - observed_time: "{{ now | date \"2006-01-02T15:04:05Z07:00\" }}" - - post_actions: - - name: "reportClusterStatus" - api_call: - method: "PUT" - url: "/clusters/{{ .clusterId }}/statuses" - headers: - - name: "Content-Type" - value: "application/json" - body: "{{ .clusterStatusPayload }}" diff --git a/testdata/adapter-configs/cl-job/adapter-task-resource-job.yaml b/testdata/adapter-configs/cl-job/adapter-task-resource-job.yaml deleted file mode 100644 index bf61887..0000000 --- a/testdata/adapter-configs/cl-job/adapter-task-resource-job.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Simple hello-world Kubernetes Job for e2e testing -apiVersion: batch/v1 -kind: Job -metadata: - name: "test-job-{{ .clusterId }}-{{ .generationSpec }}" - namespace: "{{ .clusterId }}" - labels: - hyperfleet.io/cluster-id: "{{ .clusterId }}" - hyperfleet.io/resource-type: "job" - app: test-job - annotations: - hyperfleet.io/generation: "{{ .generationSpec }}" -spec: - backoffLimit: 0 - template: - spec: - restartPolicy: Never - containers: - - name: hello-world - image: alpine:3.19 - imagePullPolicy: IfNotPresent - command: ["/bin/sh", "-c", "echo 'Hello, World!'"] - resources: - requests: - memory: "32Mi" - cpu: "50m" - limits: - memory: "64Mi" - cpu: "100m" diff --git a/testdata/adapter-configs/cl-job/values.yaml b/testdata/adapter-configs/cl-job/values.yaml deleted file mode 100644 index 3c84dfd..0000000 --- a/testdata/adapter-configs/cl-job/values.yaml +++ /dev/null @@ -1,32 +0,0 @@ -adapterConfig: - create: true - files: - adapter-config.yaml: cl-job/adapter-config.yaml - log: - level: debug - -adapterTaskConfig: - create: true - files: - task-config.yaml: cl-job/adapter-task-config.yaml - job.yaml: cl-job/adapter-task-resource-job.yaml - -broker: - type: googlepubsub - create: true - googlepubsub: - projectId: CHANGE_ME - subscriptionId: CHANGE_ME - topic: CHANGE_ME - deadLetterTopic: CHANGE_ME - -image: - registry: CHANGE_ME - repository: ci/hyperfleet-adapter - pullPolicy: Always - tag: latest - -rbac: - resources: - - namespaces - - jobs diff --git a/testdata/adapter-configs/cl-maestro/adapter-config.yaml b/testdata/adapter-configs/cl-maestro/adapter-config.yaml deleted file mode 100644 index dbe3a4e..0000000 --- a/testdata/adapter-configs/cl-maestro/adapter-config.yaml +++ /dev/null @@ -1,63 +0,0 @@ -# Example HyperFleet Adapter deployment configuration -adapter: - name: cl-maestro - #version: "0.1.0" - -# Log the full merged configuration after load (default: false) -debug_config: true -log: - level: debug - -clients: - hyperfleet_api: - base_url: CHANGE_ME - version: v1 - timeout: 2s - retry_attempts: 3 - retry_backoff: exponential - - broker: - # These values are overridden at deploy time via env vars from Helm values - subscription_id: CHANGE_ME - topic: CHANGE_ME - - maestro: - grpc_server_address: "maestro-grpc.maestro.svc.cluster.local:8090" - - # HTTPS server address for REST API operations (optional) - # Environment variable: HYPERFLEET_MAESTRO_HTTP_SERVER_ADDRESS - http_server_address: "http://maestro.maestro.svc.cluster.local:8000" - - # Source identifier for CloudEvents routing (must be unique across adapters) - # Environment variable: HYPERFLEET_MAESTRO_SOURCE_ID - source_id: "cl-maestro" - - # Client identifier (defaults to source_id if not specified) - # Environment variable: HYPERFLEET_MAESTRO_CLIENT_ID - client_id: "cl-maestro-client" - insecure: true - - # Authentication configuration - #auth: - # type: "tls" # TLS certificate-based mTLS - # - # tls_config: - # # gRPC TLS configuration - # # Certificate paths (mounted from Kubernetes secrets) - # # Environment variable: HYPERFLEET_MAESTRO_CA_FILE - # ca_file: "/etc/maestro/certs/grpc/ca.crt" - # - # # Environment variable: HYPERFLEET_MAESTRO_CERT_FILE - # cert_file: "/etc/maestro/certs/grpc/client.crt" - # - # # Environment variable: HYPERFLEET_MAESTRO_KEY_FILE - # key_file: "/etc/maestro/certs/grpc/client.key" - # - # # Server name for TLS verification - # # Environment variable: HYPERFLEET_MAESTRO_SERVER_NAME - # server_name: "maestro-grpc.maestro.svc.cluster.local" - # - # # HTTP API TLS configuration (may use different CA than gRPC) - # # If not set, falls back to ca_file for backwards compatibility - # # Environment variable: HYPERFLEET_MAESTRO_HTTP_CA_FILE - # http_ca_file: "/etc/maestro/certs/https/ca.crt" diff --git a/testdata/adapter-configs/cl-maestro/adapter-task-config.yaml b/testdata/adapter-configs/cl-maestro/adapter-task-config.yaml deleted file mode 100644 index 8978adf..0000000 --- a/testdata/adapter-configs/cl-maestro/adapter-task-config.yaml +++ /dev/null @@ -1,247 +0,0 @@ -# Example HyperFleet Adapter task configuration - -# Parameters with all required variables -params: - - - name: "clusterId" - source: "event.id" - type: "string" - required: true - - - name: "generation" - source: "event.generation" - type: "int" - required: true - - - name: "namespace" - source: "env.NAMESPACE" - type: "string" - -# Preconditions with valid operators and CEL expressions -preconditions: - - name: "clusterStatus" - api_call: - method: "GET" - url: "/clusters/{{ .clusterId }}" - timeout: 10s - retry_attempts: 3 - retry_backoff: "exponential" - capture: - - name: "clusterName" - field: "name" - - name: "generation" - field: "generation" - - name: "timestamp" - field: "created_time" - - name: "is_deleting" - expression: "has(clusterStatus.deleted_time)" - - name: "reconciledConditionStatus" - expression: | - status.conditions.filter(c, c.type == "Reconciled").size() > 0 - ? status.conditions.filter(c, c.type == "Reconciled")[0].status - : "False" - - name: "placementClusterName" - expression: "\"cluster1\"" # TBC coming from placement adapter - - name: "platformType" - expression: | - has(spec.platform) && has(spec.platform.type) ? spec.platform.type : "" - - name: "subnets" - expression: | - has(spec.platform) && has(spec.platform.gcp) && has(spec.platform.gcp.subnets) - ? spec.platform.gcp.subnets - : [] - - - - name: "validationCheck" - expression: | - is_deleting || reconciledConditionStatus == "False" - -# Resources with valid K8s manifests -resources: - - name: "resource0" - transport: - client: "maestro" - maestro: - target_cluster: "{{ .placementClusterName }}" - - # ManifestWork manifest ref with Go template structural syntax ({{ if }}, {{ else }}) - # This validates that Go template conditionals work with Maestro transport - manifest: - ref: "/etc/adapter/manifestwork.yaml" - # Discover the ResourceBundle (ManifestWork) by name from Maestro - discovery: - by_name: "{{ .clusterId }}-{{ .adapter.name }}" - lifecycle: - delete: - propagationPolicy: Background - when: - expression: "is_deleting" - - # Discover nested resources deployed by the ManifestWork - nested_discoveries: - - name: "namespace0" - discovery: - by_name: "{{ .clusterId | lower }}-{{ .adapter.name }}-namespace" - - name: "configmap0" - discovery: - by_name: "{{ .clusterId | lower }}-{{ .adapter.name }}-configmap" - -post: - payloads: - - name: "statusPayload" - build: - adapter: "{{ .adapter.name }}" - conditions: - # Applied: Check if ManifestWork exists and has type="Applied", status="True" - - type: "Applied" - status: - expression: | - has(resources.resource0) && has(resources.resource0.status) && has(resources.resource0.status.conditions) && resources.resource0.status.conditions.filter(c, has(c.type) && c.type == "Applied").size() > 0 ? resources.resource0.status.conditions.filter(c, c.type == "Applied")[0].status : "False" - reason: - expression: | - has(resources.resource0) && has(resources.resource0.status) && has(resources.resource0.status.conditions) && resources.resource0.status.conditions.filter(c, has(c.type) && c.type == "Applied").size() > 0 ? resources.resource0.status.conditions.filter(c, c.type == "Applied")[0].reason : "ManifestWorkNotDiscovered" - message: - expression: | - has(resources.resource0) && has(resources.resource0.status) && has(resources.resource0.status.conditions) && resources.resource0.status.conditions.filter(c, has(c.type) && c.type == "Applied").size() > 0 ? resources.resource0.status.conditions.filter(c, c.type == "Applied")[0].message : "ManifestWork not discovered from Maestro or no Applied condition" - - # Available: Check if nested discovered manifests are available on the spoke cluster - # Each nested discovery is enriched with top-level "conditions" from status.resourceStatus.manifests[] - - type: "Available" - status: - expression: | - has(resources.namespace0) && has(resources.namespace0.conditions) - && resources.namespace0.conditions.exists(c, has(c.type) && c.type == "Available" && has(c.status) && c.status == "True") - && has(resources.configmap0) && has(resources.configmap0.conditions) - && resources.configmap0.conditions.exists(c, c.type == "Available" && has(c.status) && c.status == "True") - ? "True" - : "False" - reason: - expression: | - !(has(resources.namespace0) && has(resources.namespace0.conditions)) - ? "NamespaceNotDiscovered" - : !resources.namespace0.conditions.exists(c, has(c.type) && c.type == "Available" && has(c.status) && c.status == "True") - ? "NamespaceNotAvailable" - : !(has(resources.configmap0) && has(resources.configmap0.conditions)) - ? "ConfigMapNotDiscovered" - : !resources.configmap0.conditions.exists(c, c.type == "Available" && has(c.status) && c.status == "True") - ? "ConfigMapNotAvailable" - : "AllResourcesAvailable" - message: - expression: | - !(has(resources.namespace0) && has(resources.namespace0.conditions)) - ? "Namespace not discovered from ManifestWork" - : !resources.namespace0.conditions.exists(c, has(c.type) && c.type == "Available" && has(c.status) && c.status == "True") - ? "Namespace not yet available on spoke cluster" - : !(has(resources.configmap0) && has(resources.configmap0.conditions)) - ? "ConfigMap not discovered from ManifestWork" - : !resources.configmap0.conditions.exists(c, c.type == "Available" && has(c.status) && c.status == "True") - ? "ConfigMap not yet available on spoke cluster" - : "All manifests (namespace, configmap) are available on spoke cluster" - - # Health: Adapter execution status — surfaces errors from any phase - - type: "Health" - status: - expression: | - adapter.?executionStatus.orValue("") == "success" - && !adapter.?resourcesSkipped.orValue(false) - ? "True" - : "False" - reason: - expression: | - adapter.?executionStatus.orValue("") != "success" - ? "ExecutionFailed:" + adapter.?executionError.?phase.orValue("unknown") - : adapter.?resourcesSkipped.orValue(false) - ? "ResourcesSkipped" - : "Healthy" - message: - expression: | - adapter.?executionStatus.orValue("") != "success" - ? "Adapter failed at phase [" - + adapter.?executionError.?phase.orValue("unknown") - + "] step [" - + adapter.?executionError.?step.orValue("unknown") - + "]: " - + adapter.?executionError.?message.orValue(adapter.?errorMessage.orValue("no details")) - : adapter.?resourcesSkipped.orValue(false) - ? "Resources skipped: " + adapter.?skipReason.orValue("unknown reason") - : "Adapter execution completed successfully" - - # Finalized: True once ManifestWork and all spoke resources are confirmed deleted - - type: "Finalized" - status: - expression: | - is_deleting - && adapter.?executionStatus.orValue("") == "success" - && !adapter.?resourcesSkipped.orValue(false) - && !resources.?resource0.hasValue() - && !resources.?namespace0.hasValue() - && !resources.?configmap0.hasValue() - ? "True" - : "False" - reason: - expression: | - !is_deleting ? "NotDeleting" - : !resources.?resource0.hasValue() - && !resources.?namespace0.hasValue() - && !resources.?configmap0.hasValue() - ? "CleanupConfirmed" - : "CleanupInProgress" - message: - expression: | - !is_deleting ? "Resource not marked for deletion" - : !resources.?resource0.hasValue() - && !resources.?namespace0.hasValue() - && !resources.?configmap0.hasValue() - ? "All resources deleted; cleanup confirmed" - : "Deletion in progress; waiting for resources to be removed" - - observed_generation: - expression: "generation" - observed_time: "{{ now | date \"2006-01-02T15:04:05Z07:00\" }}" - - # Extract data from discovered ManifestWork from Maestro - data: - manifestwork: - name: - expression: | - has(resources.resource0) && has(resources.resource0.metadata) - ? resources.resource0.metadata.name - : "" - consumer: - expression: | - has(resources.resource0) && has(resources.resource0.metadata) - ? resources.resource0.metadata.namespace - : placementClusterName - configmap: - name: - expression: | - has(resources.configmap0) && has(resources.configmap0.metadata) - ? resources.configmap0.metadata.name - : "" - clusterId: - expression: | - has(resources.configmap0) && has(resources.configmap0.data) && has(resources.configmap0.data.cluster_id) - ? resources.configmap0.data.cluster_id - : clusterId - namespace: - name: - expression: | - has(resources.namespace0) && has(resources.namespace0.metadata) - ? resources.namespace0.metadata.name - : "" - phase: - expression: | - has(resources.namespace0) && has(resources.namespace0.statusFeedback) && has(resources.namespace0.statusFeedback.values) - && resources.namespace0.statusFeedback.values.exists(v, has(v.name) && v.name == "phase" && has(v.fieldValue)) - ? resources.namespace0.statusFeedback.values.filter(v, v.name == "phase")[0].fieldValue.string - : "Unknown" - - post_actions: - - name: "reportClusterStatus" - api_call: - method: "PUT" - url: "/clusters/{{ .clusterId }}/statuses" - headers: - - name: "Content-Type" - value: "application/json" - body: "{{ .statusPayload }}" diff --git a/testdata/adapter-configs/cl-maestro/adapter-task-resource-manifestwork.yaml b/testdata/adapter-configs/cl-maestro/adapter-task-resource-manifestwork.yaml deleted file mode 100644 index 87dd7a8..0000000 --- a/testdata/adapter-configs/cl-maestro/adapter-task-resource-manifestwork.yaml +++ /dev/null @@ -1,144 +0,0 @@ -# Go template — rendered by the adapter engine before YAML parsing -apiVersion: work.open-cluster-management.io/v1 -kind: ManifestWork -metadata: - # ManifestWork name - must be unique within consumer namespace - name: "{{ .clusterId }}-{{ .adapter.name }}" - - # Labels for identification, filtering, and management - labels: - # HyperFleet tracking labels - hyperfleet.io/cluster-id: "{{ .clusterId }}" - hyperfleet.io/adapter: "{{ .adapter.name }}" - hyperfleet.io/component: "infrastructure" - hyperfleet.io/generation: "{{ .generation }}" - hyperfleet.io/resource-group: "cluster-setup" - - # Maestro-specific labels - maestro.io/source-id: "{{ .adapter.name }}" - maestro.io/resource-type: "manifestwork" - maestro.io/priority: "normal" - - # Standard Kubernetes application labels - app.kubernetes.io/name: "aro-hcp-cluster" - app.kubernetes.io/instance: "{{ .clusterId }}" - app.kubernetes.io/version: "v1.0.0" - app.kubernetes.io/component: "infrastructure" - app.kubernetes.io/part-of: "hyperfleet" - app.kubernetes.io/managed-by: "cl-maestro" - app.kubernetes.io/created-by: "{{ .adapter.name }}" -{{ if .platformType }} - hyperfleet.io/platform-type: "{{ .platformType }}" -{{ end }} - - # Annotations for metadata and operational information - annotations: - # Tracking and lifecycle - hyperfleet.io/created-by: "cl-maestro-framework" - hyperfleet.io/managed-by: "{{ .adapter.name }}" - hyperfleet.io/generation: "{{ .generation }}" - hyperfleet.io/cluster-id: "{{ .clusterId }}" - hyperfleet.io/cluster-name: "{{ .clusterName }}" - hyperfleet.io/deployment-time: "{{ .timestamp }}" - - # Maestro-specific annotations - maestro.io/applied-time: "{{ .timestamp }}" - maestro.io/source-adapter: "{{ .adapter.name }}" - - # Documentation - description: "Complete cluster setup including namespace, configuration, and RBAC" - -# ManifestWork specification -spec: - # ============================================================================ - # Workload - Contains the Kubernetes manifests to deploy - # ============================================================================ - workload: - # Kubernetes manifests array - injected by framework from business logic config - manifests: - - apiVersion: v1 - kind: Namespace - metadata: - name: "{{ .clusterId | lower }}-{{ .adapter.name }}-namespace" - labels: - app.kubernetes.io/component: adapter-task-config - app.kubernetes.io/instance: "{{ .adapter.name }}" - app.kubernetes.io/name: cl-maestro - app.kubernetes.io/transport: maestro - annotations: - hyperfleet.io/generation: "{{ .generation }}" - - apiVersion: v1 - kind: ConfigMap - data: - cluster_id: "{{ .clusterId }}" - cluster_name: "{{ .clusterName }}" -{{ if eq .platformType "gcp" }} - platform_tier: "cloud" -{{ else }} - platform_tier: "onprem" -{{ end }} -{{ range $i, $subnet := .subnets }} - subnet_{{ $subnet.id }}_name: "{{ $subnet.name }}" - subnet_{{ $subnet.id }}_cidr: "{{ $subnet.cidr }}" - subnet_{{ $subnet.id }}_role: "{{ $subnet.role }}" -{{ end }} - metadata: - name: "{{ .clusterId | lower }}-{{ .adapter.name }}-configmap" - namespace: "{{ .clusterId | lower }}-{{ .adapter.name }}-namespace" - labels: - app.kubernetes.io/component: adapter-task-config - app.kubernetes.io/instance: "{{ .adapter.name }}" - app.kubernetes.io/name: cl-maestro - app.kubernetes.io/version: 1.0.0 - app.kubernetes.io/transport: maestro - annotations: - hyperfleet.io/generation: "{{ .generation }}" - - # ============================================================================ - # Delete Options - How resources should be removed - # ============================================================================ - deleteOption: - # Propagation policy for resource deletion - # - "Foreground": Wait for dependent resources to be deleted first - # - "Background": Delete immediately, let cluster handle dependents - # - "Orphan": Leave resources on cluster when ManifestWork is deleted - propagationPolicy: "Foreground" - - # Grace period for graceful deletion (seconds) - gracePeriodSeconds: 30 - - # ============================================================================ - # Manifest Configurations - Per-resource settings for update and feedback - # ============================================================================ - manifestConfigs: - - resourceIdentifier: - group: "" # Core API group (empty for v1 resources) - resource: "namespaces" # Resource type - name: "{{ .clusterId | lower }}-{{ .adapter.name }}-namespace" # Specific resource name - updateStrategy: - type: "ServerSideApply" # Use server-side apply for namespaces - feedbackRules: - - type: "JSONPaths" # Use JSON path expressions for status feedback - jsonPaths: - - name: "phase" - path: ".status.phase" - # ======================================================================== - # Configuration for ConfigMap resources - # ======================================================================== - - resourceIdentifier: - group: "" # Core API group (empty for v1 resources) - resource: "configmaps" # Resource type - name: "{{ .clusterId | lower }}-{{ .adapter.name }}-configmap" # Specific resource name - namespace: "{{ .clusterId | lower }}-{{ .adapter.name }}-namespace" - updateStrategy: - type: "ServerSideApply" # Use server-side apply for namespaces - serverSideApply: - fieldManager: "cl-maestro" # Field manager name for conflict resolution - force: false # Don't force conflicts (fail on conflicts) - feedbackRules: - - type: "JSONPaths" # Use JSON path expressions for status feedback - jsonPaths: - - name: "data" - path: ".data" - - name: "resourceVersion" - path: ".metadata.resourceVersion" diff --git a/testdata/adapter-configs/cl-maestro/values.yaml b/testdata/adapter-configs/cl-maestro/values.yaml deleted file mode 100644 index 246a6aa..0000000 --- a/testdata/adapter-configs/cl-maestro/values.yaml +++ /dev/null @@ -1,33 +0,0 @@ -adapterConfig: - create: true - files: - adapter-config.yaml: cl-maestro/adapter-config.yaml - log: - level: debug - -adapterTaskConfig: - create: true - files: - task-config.yaml: cl-maestro/adapter-task-config.yaml - manifestwork.yaml: cl-maestro/adapter-task-resource-manifestwork.yaml - -broker: - type: googlepubsub - create: true - googlepubsub: - projectId: CHANGE_ME - subscriptionId: CHANGE_ME - topic: CHANGE_ME - deadLetterTopic: CHANGE_ME - -image: - registry: CHANGE_ME - repository: ci/hyperfleet-adapter - pullPolicy: Always - tag: latest - -rbac: - resources: - - namespaces - - configmaps - - configmaps/status diff --git a/testdata/adapter-configs/cl-namespace/adapter-config.yaml b/testdata/adapter-configs/cl-namespace/adapter-config.yaml deleted file mode 100644 index 6578f95..0000000 --- a/testdata/adapter-configs/cl-namespace/adapter-config.yaml +++ /dev/null @@ -1,23 +0,0 @@ -adapter: - name: cl-namespace - #version: "0.1.0" - -# Log the full merged configuration after load (default: false) -debug_config: false -log: - level: debug - -clients: - hyperfleet_api: - base_url: CHANGE_ME - version: v1 - timeout: 2s - retry_attempts: 3 - retry_backoff: exponential - - broker: - subscription_id: CHANGE_ME - topic: CHANGE_ME - - kubernetes: - api_version: "v1" diff --git a/testdata/adapter-configs/cl-namespace/adapter-task-config.yaml b/testdata/adapter-configs/cl-namespace/adapter-task-config.yaml deleted file mode 100644 index fc519bd..0000000 --- a/testdata/adapter-configs/cl-namespace/adapter-task-config.yaml +++ /dev/null @@ -1,189 +0,0 @@ -# Simple valid HyperFleet Adapter Configuration for testing - -# Parameters with all required variables -params: - - name: "clusterId" - source: "event.id" - type: "string" - required: true - - name: "testRunId" - source: "env.TEST_RUN_ID" - type: "string" - required: false - default: "TEST_RUN_ID" - - name: "ci" - source: "env.CI" - type: "string" - required: false - default: "false" - -# Preconditions with valid operators and CEL expressions -preconditions: - - name: "clusterStatus" - api_call: - method: "GET" - url: "/clusters/{{ .clusterId }}" - timeout: 10s - retry_attempts: 3 - retry_backoff: "exponential" - capture: - - name: "clusterName" - field: "name" - - name: "generationSpec" - field: "generation" - - name: "is_deleting" - expression: "has(clusterStatus.deleted_time)" - - name: "clusterNotReconciled" - expression: | - status.conditions.filter(c, c.type == "Reconciled").size() > 0 - ? status.conditions.filter(c, c.type == "Reconciled")[0].status != "True" - : true - - name: "clusterReconciledTTL" - expression: | - (timestamp(now()) - timestamp( - status.conditions.filter(c, c.type == "Reconciled").size() > 0 - ? status.conditions.filter(c, c.type == "Reconciled")[0].last_transition_time - : now() - )).getSeconds() > 300 - - - name: "validationCheck" - # Precondition passes if cluster is NOT Reconciled OR if cluster is Reconciled and stable for >300 seconds since last transition (enables self-healing) - expression: | - is_deleting || clusterNotReconciled || clusterReconciledTTL - -# Resources with valid K8s manifests -resources: - - name: "clusterNamespace" - transport: - client: "kubernetes" - manifest: - apiVersion: v1 - kind: Namespace - metadata: - name: "{{ .clusterId }}" - labels: - hyperfleet.io/cluster-id: "{{ .clusterId }}" - hyperfleet.io/cluster-name: "{{ .clusterName }}" - e2e.hyperfleet.io/test-run-id: "{{ .testRunId }}" - e2e.hyperfleet.io/ci: "{{ .ci }}" - e2e.hyperfleet.io/managed-by: "test-framework" - annotations: - hyperfleet.io/generation: "{{ .generationSpec }}" - discovery: - namespace: "*" # Cluster-scoped resource (Namespace) - by_selectors: - label_selector: - hyperfleet.io/cluster-id: "{{ .clusterId }}" - hyperfleet.io/cluster-name: "{{ .clusterName }}" - lifecycle: - delete: - propagationPolicy: Foreground - when: - expression: "is_deleting" - -# Post-processing with valid CEL expressions -# This example contains multiple resources, we will only report on the conditions of the jobNamespace not to overcomplicate the example -post: - payloads: - - name: "clusterStatusPayload" - build: - adapter: "{{ .adapter.name }}" - conditions: - # Applied: Job successfully created - - type: "Applied" - status: - expression: | - resources.?clusterNamespace.?status.?phase.orValue("") == "Active" ? "True" : "False" - reason: - expression: | - resources.?clusterNamespace.?status.?phase.orValue("") == "Active" - ? "NamespaceCreated" - : "NamespacePending" - message: - expression: | - resources.?clusterNamespace.?status.?phase.orValue("") == "Active" - ? "Namespace created successfully" - : "Namespace creation in progress" - # Available: Check job status conditions - - type: "Available" - status: - expression: | - resources.?clusterNamespace.?status.?phase.orValue("") == "Active" ? "True" : "False" - reason: - expression: | - resources.?clusterNamespace.?status.?phase.orValue("") == "Active" ? "NamespaceReady" : "NamespaceNotReady" - message: - expression: | - resources.?clusterNamespace.?status.?phase.orValue("") == "Active" ? "Namespace is active and ready" : "Namespace is not active and ready" - # Health: Adapter execution status (runtime) - - type: "Health" - status: - expression: | - adapter.?executionStatus.orValue("") == "success" - && !adapter.?resourcesSkipped.orValue(false) - ? "True" - : "False" - reason: - expression: | - adapter.?executionStatus.orValue("") != "success" - ? "ExecutionFailed:" + adapter.?executionError.?phase.orValue("unknown") - : adapter.?resourcesSkipped.orValue(false) - ? "ResourcesSkipped" - : "Healthy" - message: - expression: | - adapter.?executionStatus.orValue("") != "success" - ? "Adapter failed at phase [" - + adapter.?executionError.?phase.orValue("unknown") - + "] step [" - + adapter.?executionError.?step.orValue("unknown") - + "]: " - + adapter.?executionError.?message.orValue(adapter.?errorMessage.orValue("no details")) - : adapter.?resourcesSkipped.orValue(false) - ? "Resources skipped: " + adapter.?skipReason.orValue("unknown reason") - : "Adapter execution completed successfully" - # Finalized: True once namespace is confirmed deleted - - type: "Finalized" - status: - expression: | - is_deleting - && adapter.?executionStatus.orValue("") == "success" - && !adapter.?resourcesSkipped.orValue(false) - && !resources.?clusterNamespace.hasValue() - ? "True" - : "False" - reason: - expression: | - !is_deleting ? "NotDeleting" - : !resources.?clusterNamespace.hasValue() - ? "CleanupConfirmed" - : "CleanupInProgress" - message: - expression: | - !is_deleting ? "Resource not marked for deletion" - : !resources.?clusterNamespace.hasValue() - ? "All resources deleted; cleanup confirmed" - : "Deletion in progress; waiting for namespace to be removed" - # Event generation ID metadata field needs to use expression to avoid interpolation issues - observed_generation: - expression: "generationSpec" - observed_time: "{{ now | date \"2006-01-02T15:04:05Z07:00\" }}" - - data: - namespace: - name: - expression: | - resources.?clusterNamespace.?metadata.?name.orValue("") - status: - expression: | - resources.?clusterNamespace.?status.?phase.orValue("") - - post_actions: - - name: "reportClusterStatus" - api_call: - method: "PUT" - url: "/clusters/{{ .clusterId }}/statuses" - headers: - - name: "Content-Type" - value: "application/json" - body: "{{ .clusterStatusPayload }}" diff --git a/testdata/adapter-configs/cl-namespace/values.yaml b/testdata/adapter-configs/cl-namespace/values.yaml deleted file mode 100644 index a82320a..0000000 --- a/testdata/adapter-configs/cl-namespace/values.yaml +++ /dev/null @@ -1,38 +0,0 @@ -adapterConfig: - create: true - files: - adapter-config.yaml: cl-namespace/adapter-config.yaml - log: - level: debug - -adapterTaskConfig: - create: true - files: - task-config.yaml: cl-namespace/adapter-task-config.yaml - -broker: - type: googlepubsub - create: true - googlepubsub: - projectId: CHANGE_ME - subscriptionId: CHANGE_ME - topic: CHANGE_ME - deadLetterTopic: CHANGE_ME - -image: - registry: CHANGE_ME - repository: ci/hyperfleet-adapter - pullPolicy: Always - tag: latest - -rbac: - resources: - - namespaces - - serviceaccounts - - configmaps - - deployments - - roles - - rolebindings - - jobs - - jobs/status - - pods diff --git a/testdata/adapter-configs/np-configmap/adapter-config.yaml b/testdata/adapter-configs/np-configmap/adapter-config.yaml deleted file mode 100644 index dd0c593..0000000 --- a/testdata/adapter-configs/np-configmap/adapter-config.yaml +++ /dev/null @@ -1,23 +0,0 @@ -adapter: - name: np-configmap - #version: "0.1.0" - -# Log the full merged configuration after load (default: false) -debug_config: false -log: - level: debug - -clients: - hyperfleet_api: - base_url: CHANGE_ME - version: v1 - timeout: 2s - retry_attempts: 3 - retry_backoff: exponential - - broker: - subscription_id: CHANGE_ME - topic: CHANGE_ME - - kubernetes: - api_version: "v1" diff --git a/testdata/adapter-configs/np-configmap/adapter-task-config.yaml b/testdata/adapter-configs/np-configmap/adapter-task-config.yaml deleted file mode 100644 index 0e56379..0000000 --- a/testdata/adapter-configs/np-configmap/adapter-task-config.yaml +++ /dev/null @@ -1,178 +0,0 @@ -# Simple valid HyperFleet Adapter Configuration for testing - -# Parameters with all required variables -params: - - name: "clusterId" - source: "event.owner_references.id" - type: "string" - required: true - - name: "nodepoolId" - source: "event.id" - type: "string" - required: true - -# Preconditions with valid operators and CEL expressions -preconditions: - - name: "nodepoolStatus" - api_call: - method: "GET" - url: "/clusters/{{ .clusterId }}/nodepools/{{ .nodepoolId }}" - timeout: 10s - retry_attempts: 3 - retry_backoff: "exponential" - capture: - - name: "nodepoolName" - field: "name" - - name: "generationSpec" - field: "generation" - - name: "is_deleting" - expression: "has(nodepoolStatus.deleted_time)" - - name: "nodepoolNotReconciled" - expression: | - status.conditions.filter(c, c.type == "Reconciled").size() > 0 - ? status.conditions.filter(c, c.type == "Reconciled")[0].status != "True" - : true - - name: "nodepoolReconciledTTL" - expression: | - (timestamp(now()) - timestamp( - status.conditions.filter(c, c.type == "Reconciled").size() > 0 - ? status.conditions.filter(c, c.type == "Reconciled")[0].last_transition_time - : now() - )).getSeconds() > 300 - - - name: "clusterAdapterStatus" - api_call: - method: "GET" - url: "/clusters/{{ .clusterId }}/statuses" - timeout: 10s - retry_attempts: 3 - retry_backoff: "exponential" - capture: - - name: "clusterNamespaceStatus" - field: "{.items[?(@.adapter=='cl-namespace')].data.namespace.status}" - - - name: "validationCheck" - # Precondition passes if deleting OR (namespace active AND (nodepool not reconciled OR reconciled TTL expired)) - expression: | - is_deleting || (clusterNamespaceStatus == "Active" && (nodepoolNotReconciled || nodepoolReconciledTTL)) - -# Resources with valid K8s manifests -resources: - - name: "nodepoolConfigMap" - transport: - client: "kubernetes" - manifest: - ref: "/etc/adapter/configmap.yaml" - discovery: - namespace: "{{ .clusterId }}" - by_selectors: - label_selector: - hyperfleet.io/resource-type: "configmap" - hyperfleet.io/cluster-id: "{{ .clusterId }}" - hyperfleet.io/nodepool-id: "{{ .nodepoolId }}" - lifecycle: - delete: - propagationPolicy: Background - when: - expression: "is_deleting" - -# Post-processing with valid CEL expressions -post: - payloads: - - name: "nodepoolStatusPayload" - build: - adapter: "{{ .adapter.name }}" - conditions: - # Applied: ConfigMap successfully created - - type: "Applied" - status: - expression: | - has(resources.nodepoolConfigMap) ? "True" : "False" - reason: - expression: | - has(resources.nodepoolConfigMap) - ? "ConfigMapApplied" - : "ConfigMapPending" - message: - expression: | - has(resources.nodepoolConfigMap) - ? "nodepoolConfigMap manifest applied successfully" - : "nodepoolConfigMap is pending to be applied" - # Available: Check configmap exists - - type: "Available" - status: - expression: | - has(resources.nodepoolConfigMap) && has(resources.nodepoolConfigMap.data) ? "True" : "False" - reason: - expression: | - has(resources.nodepoolConfigMap) && has(resources.nodepoolConfigMap.data) - ? "ConfigMapReady" - : "ConfigMapNotReady" - message: - expression: | - has(resources.nodepoolConfigMap) && has(resources.nodepoolConfigMap.data) - ? "ConfigMap is available and contains data" - : "ConfigMap is not yet available" - # Health: Adapter execution status (runtime) - - type: "Health" - status: - expression: | - adapter.?executionStatus.orValue("") == "success" - && !adapter.?resourcesSkipped.orValue(false) - ? "True" - : "False" - reason: - expression: | - adapter.?executionStatus.orValue("") != "success" - ? "ExecutionFailed:" + adapter.?executionError.?phase.orValue("unknown") - : adapter.?resourcesSkipped.orValue(false) - ? "ResourcesSkipped" - : "Healthy" - message: - expression: | - adapter.?executionStatus.orValue("") != "success" - ? "Adapter failed at phase [" - + adapter.?executionError.?phase.orValue("unknown") - + "] step [" - + adapter.?executionError.?step.orValue("unknown") - + "]: " - + adapter.?executionError.?message.orValue(adapter.?errorMessage.orValue("no details")) - : adapter.?resourcesSkipped.orValue(false) - ? "Resources skipped: " + adapter.?skipReason.orValue("unknown reason") - : "Adapter execution completed successfully" - # Finalized: True once configmap is confirmed deleted during deletion - - type: "Finalized" - status: - expression: | - is_deleting - && adapter.?executionStatus.orValue("") == "success" - && !adapter.?resourcesSkipped.orValue(false) - && !resources.?nodepoolConfigMap.hasValue() - ? "True" - : "False" - reason: - expression: | - !is_deleting ? "NotDeleting" - : !resources.?nodepoolConfigMap.hasValue() - ? "CleanupConfirmed" - : "CleanupInProgress" - message: - expression: | - !is_deleting ? "Resource not marked for deletion" - : !resources.?nodepoolConfigMap.hasValue() - ? "All resources deleted; cleanup confirmed" - : "Deletion in progress; waiting for configmap to be removed" - # Event generation ID metadata field needs to use expression to avoid interpolation issues - observed_generation: - expression: "generationSpec" - observed_time: "{{ now | date \"2006-01-02T15:04:05Z07:00\" }}" - - post_actions: - - name: "reportNodepoolStatus" - api_call: - method: "PUT" - url: "/clusters/{{ .clusterId }}/nodepools/{{ .nodepoolId }}/statuses" - headers: - - name: "Content-Type" - value: "application/json" - body: "{{ .nodepoolStatusPayload }}" diff --git a/testdata/adapter-configs/np-configmap/adapter-task-resource-configmap.yaml b/testdata/adapter-configs/np-configmap/adapter-task-resource-configmap.yaml deleted file mode 100644 index 750344b..0000000 --- a/testdata/adapter-configs/np-configmap/adapter-task-resource-configmap.yaml +++ /dev/null @@ -1,19 +0,0 @@ -# Test ConfigMap template for nodepool -apiVersion: v1 -kind: ConfigMap -metadata: - name: "nodepool-config-{{ .nodepoolId }}" - namespace: "{{ .clusterId }}" - labels: - hyperfleet.io/cluster-id: "{{ .clusterId }}" - hyperfleet.io/nodepool-id: "{{ .nodepoolId }}" - hyperfleet.io/nodepool-name: "{{ .nodepoolName }}" - hyperfleet.io/resource-type: "configmap" - annotations: - hyperfleet.io/generation: "{{ .generationSpec }}" -data: - nodepool.id: "{{ .nodepoolId }}" - nodepool.name: "{{ .nodepoolName }}" - cluster.id: "{{ .clusterId }}" - test.key: "test-value" - environment: "e2e-testing" diff --git a/testdata/adapter-configs/np-configmap/values.yaml b/testdata/adapter-configs/np-configmap/values.yaml deleted file mode 100644 index 3e448c8..0000000 --- a/testdata/adapter-configs/np-configmap/values.yaml +++ /dev/null @@ -1,32 +0,0 @@ -adapterConfig: - create: true - files: - adapter-config.yaml: np-configmap/adapter-config.yaml - log: - level: debug - -adapterTaskConfig: - create: true - files: - task-config.yaml: np-configmap/adapter-task-config.yaml - configmap.yaml: np-configmap/adapter-task-resource-configmap.yaml - -broker: - type: googlepubsub - create: true - googlepubsub: - projectId: CHANGE_ME - subscriptionId: CHANGE_ME - topic: CHANGE_ME - deadLetterTopic: CHANGE_ME - -image: - registry: CHANGE_ME - repository: ci/hyperfleet-adapter - pullPolicy: Always - tag: latest - -rbac: - resources: - - configmaps - - configmaps/status