Scale Test Infrastructure #92
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Scale Test Infrastructure | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| action: | |
| description: 'Action to perform' | |
| required: true | |
| type: choice | |
| options: | |
| - plan | |
| - apply | |
| - destroy | |
| scale_multiplier: | |
| description: 'Resource multiplier (1=175, 10=1740, 50=8700 resources)' | |
| required: true | |
| type: choice | |
| default: '1' | |
| options: | |
| - '1' | |
| - '5' | |
| - '10' | |
| - '25' | |
| - '50' | |
| scenario: | |
| description: 'Test scenario (triggers specific risks in Overmind)' | |
| required: false | |
| type: choice | |
| default: 'none' | |
| options: | |
| - 'none' | |
| # AWS scenarios | |
| - 'lambda_timeout' | |
| - 'shared_sg_open' | |
| - 'vpc_peering_change' | |
| - 'central_sns_change' | |
| - 'combined_network' | |
| - 'combined_all' | |
| - 'combined_max' | |
| # GCP scenarios (requires cloud_provider=gcp or both) | |
| - 'shared_firewall_open' | |
| - 'central_pubsub_change' | |
| - 'gce_downgrade' | |
| - 'function_timeout' | |
| - 'combined_gcp_all' | |
| cloud_provider: | |
| description: 'Cloud provider to deploy' | |
| required: true | |
| type: choice | |
| default: 'aws' | |
| options: | |
| - 'aws' | |
| - 'gcp' | |
| - 'both' | |
| confirmation: | |
| description: 'For destroy: type DESTROY-SCALE-TEST to confirm' | |
| required: false | |
| type: string | |
| env: | |
| TF_VAR_scale_multiplier: ${{ inputs.scale_multiplier }} | |
| TF_VAR_scenario: ${{ inputs.scenario }} | |
| TF_VAR_cloud_provider: ${{ inputs.cloud_provider }} | |
| WORKING_DIR: scale-test | |
| jobs: | |
| validate: | |
| name: Validate Inputs | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Validate destroy confirmation | |
| if: ${{ inputs.action == 'destroy' }} | |
| run: | | |
| if [ "${{ inputs.confirmation }}" != "DESTROY-SCALE-TEST" ]; then | |
| echo "::error::Destroy requires confirmation. Type 'DESTROY-SCALE-TEST' in the confirmation field." | |
| exit 1 | |
| fi | |
| echo "Destroy confirmation validated" | |
| terraform: | |
| name: Terraform ${{ inputs.action }} (×${{ inputs.scale_multiplier }}, scenario=${{ inputs.scenario }}) | |
| needs: validate | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: read | |
| id-token: write | |
| pull-requests: write | |
| concurrency: | |
| group: scale-test-tfstate | |
| cancel-in-progress: false | |
| defaults: | |
| run: | |
| working-directory: ${{ env.WORKING_DIR }} | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Setup Terraform | |
| uses: hashicorp/setup-terraform@v3 | |
| with: | |
| terraform_wrapper: false | |
| - name: Configure AWS Credentials | |
| uses: aws-actions/configure-aws-credentials@v4 | |
| with: | |
| audience: sts.amazonaws.com | |
| aws-region: us-east-1 | |
| role-to-assume: ${{ vars.TERRAFORM_DEPLOY_ROLE }} | |
| # Always authenticate with GCP - Terraform validates all providers during init | |
| # even when cloud_provider=aws (no GCP resources created) | |
| - name: Configure GCP Credentials | |
| uses: google-github-actions/auth@v2 | |
| with: | |
| credentials_json: ${{ secrets.OVERMIND_SCALE_TEST }} | |
| - name: Setup GCP SDK | |
| uses: google-github-actions/setup-gcloud@v2 | |
| with: | |
| project_id: overmind-scale-test | |
| - name: Terraform Init | |
| id: init | |
| run: | | |
| terraform version | |
| terraform init -input=false | |
| - name: Terraform Plan | |
| id: plan | |
| if: ${{ inputs.action == 'plan' || inputs.action == 'apply' }} | |
| run: | | |
| set -o pipefail | |
| echo "Planning with scale_multiplier=${{ inputs.scale_multiplier }}, scenario=${{ inputs.scenario }}" | |
| terraform plan \ | |
| -compact-warnings \ | |
| -no-color \ | |
| -input=false \ | |
| -lock-timeout=5m \ | |
| -out=tfplan 2>&1 | tee terraform_plan.log | |
| # Generate JSON plan for Overmind | |
| terraform show -json tfplan > tfplan.json | |
| # Output summary | |
| echo "## Terraform Plan Summary" >> $GITHUB_STEP_SUMMARY | |
| echo "- **Scale Multiplier:** ${{ inputs.scale_multiplier }}" >> $GITHUB_STEP_SUMMARY | |
| echo "- **Scenario:** ${{ inputs.scenario }}" >> $GITHUB_STEP_SUMMARY | |
| echo "- **Action:** ${{ inputs.action }}" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "### Resource Changes" >> $GITHUB_STEP_SUMMARY | |
| terraform show -no-color tfplan | grep -E "^(Plan:|No changes)" >> $GITHUB_STEP_SUMMARY || true | |
| - name: Install Overmind CLI | |
| uses: overmindtech/actions/install-cli@main | |
| id: install-cli | |
| with: | |
| version: latest | |
| github-token: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Submit Plan to Overmind | |
| uses: overmindtech/actions/submit-plan@main | |
| continue-on-error: true | |
| id: submit-plan | |
| if: ${{ inputs.action == 'plan' || inputs.action == 'apply' }} | |
| with: | |
| ovm-api-key: ${{ secrets.OVM_API_KEY }} | |
| plan-json: ${{ env.WORKING_DIR }}/tfplan.json | |
| tags: 'model=risks_v6' | |
| # ========================================================================= | |
| # Quality Evaluation Steps | |
| # Capture analysis results and validate expected risks for each scenario | |
| # ========================================================================= | |
| - name: Get change results as JSON | |
| id: get-results | |
| if: ${{ steps.submit-plan.outputs.change-url != '' && (inputs.action == 'plan' || inputs.action == 'apply') }} | |
| continue-on-error: true | |
| env: | |
| OVM_API_KEY: ${{ secrets.OVM_API_KEY }} | |
| run: | | |
| echo "Fetching change results from: ${{ steps.submit-plan.outputs.change-url }}" | |
| # Verify overmind CLI is available | |
| if ! command -v overmind &> /dev/null; then | |
| echo "::error::Overmind CLI not found. Install step may have failed." | |
| exit 1 | |
| fi | |
| # Get the full change analysis as JSON | |
| overmind changes get-change \ | |
| --change "${{ steps.submit-plan.outputs.change-url }}" \ | |
| --format json \ | |
| > change-results.json | |
| # Extract key metrics for assertions | |
| RISK_COUNT=$(jq '.risks | length // 0' change-results.json) | |
| HIGH_RISK_COUNT=$(jq '[.risks[]? | select(.severity == "high" or .severity == "critical")] | length' change-results.json) | |
| MEDIUM_RISK_COUNT=$(jq '[.risks[]? | select(.severity == "medium")] | length' change-results.json) | |
| echo "risk_count=$RISK_COUNT" >> $GITHUB_OUTPUT | |
| echo "high_risk_count=$HIGH_RISK_COUNT" >> $GITHUB_OUTPUT | |
| echo "medium_risk_count=$MEDIUM_RISK_COUNT" >> $GITHUB_OUTPUT | |
| echo "## Change Analysis Results" >> $GITHUB_STEP_SUMMARY | |
| echo "- **Total Risks:** $RISK_COUNT" >> $GITHUB_STEP_SUMMARY | |
| echo "- **High/Critical Risks:** $HIGH_RISK_COUNT" >> $GITHUB_STEP_SUMMARY | |
| echo "- **Medium Risks:** $MEDIUM_RISK_COUNT" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| # List all risks in summary | |
| echo "### Detected Risks" >> $GITHUB_STEP_SUMMARY | |
| if [ "$RISK_COUNT" -gt 0 ]; then | |
| jq -r '.risks[]? | "- **[\(.severity)]** \(.title)"' change-results.json >> $GITHUB_STEP_SUMMARY | |
| else | |
| echo "_No risks detected_" >> $GITHUB_STEP_SUMMARY | |
| fi | |
| - name: Validate scenario results | |
| id: validate-scenario | |
| if: ${{ steps.get-results.outcome == 'success' && inputs.scenario != 'none' }} | |
| run: | | |
| SCENARIO="${{ inputs.scenario }}" | |
| RISK_COUNT="${{ steps.get-results.outputs.risk_count }}" | |
| HIGH_RISK_COUNT="${{ steps.get-results.outputs.high_risk_count }}" | |
| MEDIUM_RISK_COUNT="${{ steps.get-results.outputs.medium_risk_count }}" | |
| echo "" | |
| echo "==========================================" | |
| echo "Validating results for scenario: $SCENARIO" | |
| echo "==========================================" | |
| echo "Total risks found: $RISK_COUNT" | |
| echo "High/Critical risks: $HIGH_RISK_COUNT" | |
| echo "Medium risks: $MEDIUM_RISK_COUNT" | |
| echo "" | |
| VALIDATION_PASSED=true | |
| VALIDATION_MESSAGES="" | |
| # ----------------------------------------------------------------------- | |
| # shared_sg_open: SSH open to 0.0.0.0/0 - expect HIGH/CRITICAL risk | |
| # ----------------------------------------------------------------------- | |
| if [[ "$SCENARIO" == "shared_sg_open" ]]; then | |
| echo "Testing: shared_sg_open - expect high/critical security risk" | |
| if [ "$HIGH_RISK_COUNT" -eq 0 ]; then | |
| echo "❌ FAIL: Expected high-severity risk for SSH-to-internet scenario" | |
| VALIDATION_PASSED=false | |
| VALIDATION_MESSAGES="Expected high-severity SSH risk not found" | |
| else | |
| echo "✅ PASS: Found $HIGH_RISK_COUNT high-severity risk(s) as expected" | |
| fi | |
| fi | |
| # ----------------------------------------------------------------------- | |
| # lambda_timeout: Timeout reduced to 1s - expect MEDIUM risk | |
| # ----------------------------------------------------------------------- | |
| if [[ "$SCENARIO" == "lambda_timeout" ]]; then | |
| echo "Testing: lambda_timeout - expect timeout/reliability risk" | |
| TIMEOUT_RISK=$(jq '[.risks[]? | select(.title | test("timeout|reliability|function"; "i"))] | length' change-results.json) | |
| if [ "$TIMEOUT_RISK" -eq 0 ] && [ "$RISK_COUNT" -eq 0 ]; then | |
| echo "❌ FAIL: Expected timeout-related risk" | |
| VALIDATION_PASSED=false | |
| VALIDATION_MESSAGES="Expected timeout risk not found" | |
| else | |
| echo "✅ PASS: Found risk(s) for timeout scenario" | |
| fi | |
| fi | |
| # ----------------------------------------------------------------------- | |
| # vpc_peering_change: DNS resolution enabled - expect analysis (may or may not be risk) | |
| # ----------------------------------------------------------------------- | |
| if [[ "$SCENARIO" == "vpc_peering_change" ]]; then | |
| echo "Testing: vpc_peering_change - expect network analysis" | |
| if [ "$RISK_COUNT" -eq 0 ]; then | |
| echo "⚠️ INFO: No risks found for VPC peering change (may be expected)" | |
| else | |
| echo "✅ PASS: Found $RISK_COUNT risk(s) for VPC peering change" | |
| fi | |
| # VPC peering is ambiguous, so we don't fail on no risks | |
| fi | |
| # ----------------------------------------------------------------------- | |
| # central_sns_change: SNS topic policy change - expect HIGH risk | |
| # ----------------------------------------------------------------------- | |
| if [[ "$SCENARIO" == "central_sns_change" ]]; then | |
| echo "Testing: central_sns_change - expect SNS policy risk" | |
| if [ "$HIGH_RISK_COUNT" -eq 0 ] && [ "$MEDIUM_RISK_COUNT" -eq 0 ]; then | |
| echo "❌ FAIL: Expected risk for SNS topic policy change" | |
| VALIDATION_PASSED=false | |
| VALIDATION_MESSAGES="Expected SNS policy risk not found" | |
| else | |
| echo "✅ PASS: Found risk(s) for SNS change" | |
| fi | |
| fi | |
| # ----------------------------------------------------------------------- | |
| # combined_network: Multiple network/security changes - expect HIGH/CRITICAL | |
| # ----------------------------------------------------------------------- | |
| if [[ "$SCENARIO" == "combined_network" || "$SCENARIO" == "combined_all" || "$SCENARIO" == "combined_max" ]]; then | |
| echo "Testing: $SCENARIO - expect multiple high-severity risks" | |
| if [ "$HIGH_RISK_COUNT" -eq 0 ]; then | |
| echo "❌ FAIL: Expected high-severity risks for combined scenario" | |
| VALIDATION_PASSED=false | |
| VALIDATION_MESSAGES="Expected high-severity risks for combined scenario not found" | |
| else | |
| echo "✅ PASS: Found $HIGH_RISK_COUNT high-severity risk(s)" | |
| fi | |
| fi | |
| # ----------------------------------------------------------------------- | |
| # GCP: shared_firewall_open - expect HIGH/CRITICAL risk | |
| # ----------------------------------------------------------------------- | |
| if [[ "$SCENARIO" == "shared_firewall_open" ]]; then | |
| echo "Testing: shared_firewall_open - expect high/critical security risk" | |
| if [ "$HIGH_RISK_COUNT" -eq 0 ]; then | |
| echo "❌ FAIL: Expected high-severity risk for firewall-to-internet scenario" | |
| VALIDATION_PASSED=false | |
| VALIDATION_MESSAGES="Expected high-severity firewall risk not found" | |
| else | |
| echo "✅ PASS: Found $HIGH_RISK_COUNT high-severity risk(s)" | |
| fi | |
| fi | |
| # ----------------------------------------------------------------------- | |
| # GCP: function_timeout - expect MEDIUM risk | |
| # ----------------------------------------------------------------------- | |
| if [[ "$SCENARIO" == "function_timeout" ]]; then | |
| echo "Testing: function_timeout - expect timeout risk" | |
| if [ "$RISK_COUNT" -eq 0 ]; then | |
| echo "❌ FAIL: Expected timeout-related risk for Cloud Function" | |
| VALIDATION_PASSED=false | |
| VALIDATION_MESSAGES="Expected Cloud Function timeout risk not found" | |
| else | |
| echo "✅ PASS: Found risk(s) for function timeout scenario" | |
| fi | |
| fi | |
| echo "" | |
| echo "==========================================" | |
| # Write validation results to summary | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "### Scenario Validation: \`$SCENARIO\`" >> $GITHUB_STEP_SUMMARY | |
| if [ "$VALIDATION_PASSED" = true ]; then | |
| echo "✅ **PASSED** - Expected risks were detected" >> $GITHUB_STEP_SUMMARY | |
| echo "validation_passed=true" >> $GITHUB_OUTPUT | |
| else | |
| echo "❌ **FAILED** - $VALIDATION_MESSAGES" >> $GITHUB_STEP_SUMMARY | |
| echo "validation_passed=false" >> $GITHUB_OUTPUT | |
| echo "::error::Scenario validation failed: $VALIDATION_MESSAGES" | |
| exit 1 | |
| fi | |
| - name: Upload analysis results | |
| uses: actions/upload-artifact@v4 | |
| if: ${{ always() && (inputs.action == 'plan' || inputs.action == 'apply') }} | |
| with: | |
| name: change-analysis-${{ inputs.scenario }}-${{ github.run_id }} | |
| path: | | |
| ${{ env.WORKING_DIR }}/change-results.json | |
| ${{ env.WORKING_DIR }}/tfplan.json | |
| if-no-files-found: ignore | |
| # ========================================================================= | |
| # PromptFoo Quality Evals | |
| # Run LLM-as-judge evaluation on the risk analysis quality | |
| # ========================================================================= | |
| - name: Setup Node.js for PromptFoo | |
| uses: actions/setup-node@v4 | |
| if: ${{ steps.get-results.outcome == 'success' && inputs.scenario != 'none' }} | |
| with: | |
| node-version: '20' | |
| - name: Install PromptFoo dependencies | |
| if: ${{ steps.get-results.outcome == 'success' && inputs.scenario != 'none' }} | |
| working-directory: ${{ env.WORKING_DIR }}/evals | |
| run: npm install | |
| - name: Run PromptFoo quality evals | |
| id: promptfoo-eval | |
| if: ${{ steps.get-results.outcome == 'success' && inputs.scenario != 'none' }} | |
| continue-on-error: true | |
| working-directory: ${{ env.WORKING_DIR }}/evals | |
| env: | |
| OPENAI_API_KEY: ${{ secrets.OPENAI_KEY }} | |
| run: | | |
| echo "Running PromptFoo evals for scenario: ${{ inputs.scenario }}" | |
| # Run evals with the change results file | |
| npx promptfoo eval \ | |
| --vars "results_file=../change-results.json,scenario=${{ inputs.scenario }}" \ | |
| --filter-description "${{ inputs.scenario }}" \ | |
| --output eval-results.json \ | |
| --no-cache | |
| # Extract summary metrics | |
| if [ -f eval-results.json ]; then | |
| PASS_COUNT=$(jq '[.results[].success] | map(select(. == true)) | length' eval-results.json) | |
| TOTAL_COUNT=$(jq '[.results[].success] | length' eval-results.json) | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "### PromptFoo Quality Eval" >> $GITHUB_STEP_SUMMARY | |
| echo "- **Passed:** $PASS_COUNT / $TOTAL_COUNT assertions" >> $GITHUB_STEP_SUMMARY | |
| # Show any failures | |
| jq -r '.results[] | select(.success == false) | "- ❌ \(.description): \(.error // "assertion failed")"' eval-results.json >> $GITHUB_STEP_SUMMARY || true | |
| fi | |
| - name: Upload PromptFoo results | |
| uses: actions/upload-artifact@v4 | |
| if: ${{ steps.promptfoo-eval.outcome != 'skipped' }} | |
| with: | |
| name: promptfoo-results-${{ inputs.scenario }}-${{ github.run_id }} | |
| path: ${{ env.WORKING_DIR }}/evals/eval-results.json | |
| if-no-files-found: ignore | |
| # Cost Analysis disabled for scale testing (plan too large) | |
| # - name: Cost Analysis | |
| # uses: overmindtech/cost-signals-action@v1 | |
| # continue-on-error: true | |
| # if: ${{ inputs.action == 'plan' || inputs.action == 'apply' }} | |
| # with: | |
| # overmind-api-key: ${{ secrets.OVM_API_KEY }} | |
| # infracost-api-key: ${{ secrets.INFRACOST_API_KEY }} | |
| # terraform-plan-json: ${{ env.WORKING_DIR }}/tfplan.json | |
| # ticket-link: ${{ steps.submit-plan.outputs.change-url }} | |
| - name: Start Overmind Change | |
| uses: overmindtech/actions/start-change@main | |
| continue-on-error: true | |
| if: ${{ inputs.action == 'apply' }} | |
| with: | |
| ovm-api-key: ${{ secrets.OVM_API_KEY }} | |
| - name: Terraform Apply | |
| id: apply | |
| if: ${{ inputs.action == 'apply' }} | |
| run: | | |
| echo "Applying scale test infrastructure (×${{ inputs.scale_multiplier }})" | |
| terraform apply \ | |
| -auto-approve \ | |
| -no-color \ | |
| -input=false \ | |
| -lock-timeout=5m \ | |
| tfplan | |
| echo "## Apply Complete" >> $GITHUB_STEP_SUMMARY | |
| echo "Scale test infrastructure deployed with multiplier ×${{ inputs.scale_multiplier }}" >> $GITHUB_STEP_SUMMARY | |
| - name: End Overmind Change | |
| uses: overmindtech/actions/end-change@main | |
| continue-on-error: true | |
| if: ${{ (inputs.action == 'apply') && (success() || failure() || cancelled()) }} | |
| with: | |
| ovm-api-key: ${{ secrets.OVM_API_KEY }} | |
| - name: Terraform Destroy | |
| id: destroy | |
| if: ${{ inputs.action == 'destroy' }} | |
| run: | | |
| echo "::warning::Destroying scale test infrastructure (×${{ inputs.scale_multiplier }})" | |
| terraform destroy \ | |
| -auto-approve \ | |
| -no-color \ | |
| -input=false \ | |
| -lock-timeout=10m | |
| echo "## Destroy Complete" >> $GITHUB_STEP_SUMMARY | |
| echo "Scale test infrastructure destroyed" >> $GITHUB_STEP_SUMMARY | |
| - name: Output Terraform Summary | |
| if: always() | |
| run: | | |
| echo "" | |
| echo "=== Scale Test Summary ===" | |
| echo "Action: ${{ inputs.action }}" | |
| echo "Multiplier: ${{ inputs.scale_multiplier }}" | |
| echo "Scenario: ${{ inputs.scenario }}" | |
| echo "Status: ${{ job.status }}" | |