Skip to content

Scale Test Infrastructure #92

Scale Test Infrastructure

Scale Test Infrastructure #92

Workflow file for this run

name: Scale Test Infrastructure
on:
workflow_dispatch:
inputs:
action:
description: 'Action to perform'
required: true
type: choice
options:
- plan
- apply
- destroy
scale_multiplier:
description: 'Resource multiplier (1=175, 10=1740, 50=8700 resources)'
required: true
type: choice
default: '1'
options:
- '1'
- '5'
- '10'
- '25'
- '50'
scenario:
description: 'Test scenario (triggers specific risks in Overmind)'
required: false
type: choice
default: 'none'
options:
- 'none'
# AWS scenarios
- 'lambda_timeout'
- 'shared_sg_open'
- 'vpc_peering_change'
- 'central_sns_change'
- 'combined_network'
- 'combined_all'
- 'combined_max'
# GCP scenarios (requires cloud_provider=gcp or both)
- 'shared_firewall_open'
- 'central_pubsub_change'
- 'gce_downgrade'
- 'function_timeout'
- 'combined_gcp_all'
cloud_provider:
description: 'Cloud provider to deploy'
required: true
type: choice
default: 'aws'
options:
- 'aws'
- 'gcp'
- 'both'
confirmation:
description: 'For destroy: type DESTROY-SCALE-TEST to confirm'
required: false
type: string
env:
TF_VAR_scale_multiplier: ${{ inputs.scale_multiplier }}
TF_VAR_scenario: ${{ inputs.scenario }}
TF_VAR_cloud_provider: ${{ inputs.cloud_provider }}
WORKING_DIR: scale-test
jobs:
validate:
name: Validate Inputs
runs-on: ubuntu-latest
steps:
- name: Validate destroy confirmation
if: ${{ inputs.action == 'destroy' }}
run: |
if [ "${{ inputs.confirmation }}" != "DESTROY-SCALE-TEST" ]; then
echo "::error::Destroy requires confirmation. Type 'DESTROY-SCALE-TEST' in the confirmation field."
exit 1
fi
echo "Destroy confirmation validated"
terraform:
name: Terraform ${{ inputs.action }} (×${{ inputs.scale_multiplier }}, scenario=${{ inputs.scenario }})
needs: validate
runs-on: ubuntu-latest
permissions:
contents: read
id-token: write
pull-requests: write
concurrency:
group: scale-test-tfstate
cancel-in-progress: false
defaults:
run:
working-directory: ${{ env.WORKING_DIR }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Terraform
uses: hashicorp/setup-terraform@v3
with:
terraform_wrapper: false
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
audience: sts.amazonaws.com
aws-region: us-east-1
role-to-assume: ${{ vars.TERRAFORM_DEPLOY_ROLE }}
# Always authenticate with GCP - Terraform validates all providers during init
# even when cloud_provider=aws (no GCP resources created)
- name: Configure GCP Credentials
uses: google-github-actions/auth@v2
with:
credentials_json: ${{ secrets.OVERMIND_SCALE_TEST }}
- name: Setup GCP SDK
uses: google-github-actions/setup-gcloud@v2
with:
project_id: overmind-scale-test
- name: Terraform Init
id: init
run: |
terraform version
terraform init -input=false
- name: Terraform Plan
id: plan
if: ${{ inputs.action == 'plan' || inputs.action == 'apply' }}
run: |
set -o pipefail
echo "Planning with scale_multiplier=${{ inputs.scale_multiplier }}, scenario=${{ inputs.scenario }}"
terraform plan \
-compact-warnings \
-no-color \
-input=false \
-lock-timeout=5m \
-out=tfplan 2>&1 | tee terraform_plan.log
# Generate JSON plan for Overmind
terraform show -json tfplan > tfplan.json
# Output summary
echo "## Terraform Plan Summary" >> $GITHUB_STEP_SUMMARY
echo "- **Scale Multiplier:** ${{ inputs.scale_multiplier }}" >> $GITHUB_STEP_SUMMARY
echo "- **Scenario:** ${{ inputs.scenario }}" >> $GITHUB_STEP_SUMMARY
echo "- **Action:** ${{ inputs.action }}" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Resource Changes" >> $GITHUB_STEP_SUMMARY
terraform show -no-color tfplan | grep -E "^(Plan:|No changes)" >> $GITHUB_STEP_SUMMARY || true
- name: Install Overmind CLI
uses: overmindtech/actions/install-cli@main
id: install-cli
with:
version: latest
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: Submit Plan to Overmind
uses: overmindtech/actions/submit-plan@main
continue-on-error: true
id: submit-plan
if: ${{ inputs.action == 'plan' || inputs.action == 'apply' }}
with:
ovm-api-key: ${{ secrets.OVM_API_KEY }}
plan-json: ${{ env.WORKING_DIR }}/tfplan.json
tags: 'model=risks_v6'
# =========================================================================
# Quality Evaluation Steps
# Capture analysis results and validate expected risks for each scenario
# =========================================================================
- name: Get change results as JSON
id: get-results
if: ${{ steps.submit-plan.outputs.change-url != '' && (inputs.action == 'plan' || inputs.action == 'apply') }}
continue-on-error: true
env:
OVM_API_KEY: ${{ secrets.OVM_API_KEY }}
run: |
echo "Fetching change results from: ${{ steps.submit-plan.outputs.change-url }}"
# Verify overmind CLI is available
if ! command -v overmind &> /dev/null; then
echo "::error::Overmind CLI not found. Install step may have failed."
exit 1
fi
# Get the full change analysis as JSON
overmind changes get-change \
--change "${{ steps.submit-plan.outputs.change-url }}" \
--format json \
> change-results.json
# Extract key metrics for assertions
RISK_COUNT=$(jq '.risks | length // 0' change-results.json)
HIGH_RISK_COUNT=$(jq '[.risks[]? | select(.severity == "high" or .severity == "critical")] | length' change-results.json)
MEDIUM_RISK_COUNT=$(jq '[.risks[]? | select(.severity == "medium")] | length' change-results.json)
echo "risk_count=$RISK_COUNT" >> $GITHUB_OUTPUT
echo "high_risk_count=$HIGH_RISK_COUNT" >> $GITHUB_OUTPUT
echo "medium_risk_count=$MEDIUM_RISK_COUNT" >> $GITHUB_OUTPUT
echo "## Change Analysis Results" >> $GITHUB_STEP_SUMMARY
echo "- **Total Risks:** $RISK_COUNT" >> $GITHUB_STEP_SUMMARY
echo "- **High/Critical Risks:** $HIGH_RISK_COUNT" >> $GITHUB_STEP_SUMMARY
echo "- **Medium Risks:** $MEDIUM_RISK_COUNT" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
# List all risks in summary
echo "### Detected Risks" >> $GITHUB_STEP_SUMMARY
if [ "$RISK_COUNT" -gt 0 ]; then
jq -r '.risks[]? | "- **[\(.severity)]** \(.title)"' change-results.json >> $GITHUB_STEP_SUMMARY
else
echo "_No risks detected_" >> $GITHUB_STEP_SUMMARY
fi
- name: Validate scenario results
id: validate-scenario
if: ${{ steps.get-results.outcome == 'success' && inputs.scenario != 'none' }}
run: |
SCENARIO="${{ inputs.scenario }}"
RISK_COUNT="${{ steps.get-results.outputs.risk_count }}"
HIGH_RISK_COUNT="${{ steps.get-results.outputs.high_risk_count }}"
MEDIUM_RISK_COUNT="${{ steps.get-results.outputs.medium_risk_count }}"
echo ""
echo "=========================================="
echo "Validating results for scenario: $SCENARIO"
echo "=========================================="
echo "Total risks found: $RISK_COUNT"
echo "High/Critical risks: $HIGH_RISK_COUNT"
echo "Medium risks: $MEDIUM_RISK_COUNT"
echo ""
VALIDATION_PASSED=true
VALIDATION_MESSAGES=""
# -----------------------------------------------------------------------
# shared_sg_open: SSH open to 0.0.0.0/0 - expect HIGH/CRITICAL risk
# -----------------------------------------------------------------------
if [[ "$SCENARIO" == "shared_sg_open" ]]; then
echo "Testing: shared_sg_open - expect high/critical security risk"
if [ "$HIGH_RISK_COUNT" -eq 0 ]; then
echo "❌ FAIL: Expected high-severity risk for SSH-to-internet scenario"
VALIDATION_PASSED=false
VALIDATION_MESSAGES="Expected high-severity SSH risk not found"
else
echo "✅ PASS: Found $HIGH_RISK_COUNT high-severity risk(s) as expected"
fi
fi
# -----------------------------------------------------------------------
# lambda_timeout: Timeout reduced to 1s - expect MEDIUM risk
# -----------------------------------------------------------------------
if [[ "$SCENARIO" == "lambda_timeout" ]]; then
echo "Testing: lambda_timeout - expect timeout/reliability risk"
TIMEOUT_RISK=$(jq '[.risks[]? | select(.title | test("timeout|reliability|function"; "i"))] | length' change-results.json)
if [ "$TIMEOUT_RISK" -eq 0 ] && [ "$RISK_COUNT" -eq 0 ]; then
echo "❌ FAIL: Expected timeout-related risk"
VALIDATION_PASSED=false
VALIDATION_MESSAGES="Expected timeout risk not found"
else
echo "✅ PASS: Found risk(s) for timeout scenario"
fi
fi
# -----------------------------------------------------------------------
# vpc_peering_change: DNS resolution enabled - expect analysis (may or may not be risk)
# -----------------------------------------------------------------------
if [[ "$SCENARIO" == "vpc_peering_change" ]]; then
echo "Testing: vpc_peering_change - expect network analysis"
if [ "$RISK_COUNT" -eq 0 ]; then
echo "⚠️ INFO: No risks found for VPC peering change (may be expected)"
else
echo "✅ PASS: Found $RISK_COUNT risk(s) for VPC peering change"
fi
# VPC peering is ambiguous, so we don't fail on no risks
fi
# -----------------------------------------------------------------------
# central_sns_change: SNS topic policy change - expect HIGH risk
# -----------------------------------------------------------------------
if [[ "$SCENARIO" == "central_sns_change" ]]; then
echo "Testing: central_sns_change - expect SNS policy risk"
if [ "$HIGH_RISK_COUNT" -eq 0 ] && [ "$MEDIUM_RISK_COUNT" -eq 0 ]; then
echo "❌ FAIL: Expected risk for SNS topic policy change"
VALIDATION_PASSED=false
VALIDATION_MESSAGES="Expected SNS policy risk not found"
else
echo "✅ PASS: Found risk(s) for SNS change"
fi
fi
# -----------------------------------------------------------------------
# combined_network: Multiple network/security changes - expect HIGH/CRITICAL
# -----------------------------------------------------------------------
if [[ "$SCENARIO" == "combined_network" || "$SCENARIO" == "combined_all" || "$SCENARIO" == "combined_max" ]]; then
echo "Testing: $SCENARIO - expect multiple high-severity risks"
if [ "$HIGH_RISK_COUNT" -eq 0 ]; then
echo "❌ FAIL: Expected high-severity risks for combined scenario"
VALIDATION_PASSED=false
VALIDATION_MESSAGES="Expected high-severity risks for combined scenario not found"
else
echo "✅ PASS: Found $HIGH_RISK_COUNT high-severity risk(s)"
fi
fi
# -----------------------------------------------------------------------
# GCP: shared_firewall_open - expect HIGH/CRITICAL risk
# -----------------------------------------------------------------------
if [[ "$SCENARIO" == "shared_firewall_open" ]]; then
echo "Testing: shared_firewall_open - expect high/critical security risk"
if [ "$HIGH_RISK_COUNT" -eq 0 ]; then
echo "❌ FAIL: Expected high-severity risk for firewall-to-internet scenario"
VALIDATION_PASSED=false
VALIDATION_MESSAGES="Expected high-severity firewall risk not found"
else
echo "✅ PASS: Found $HIGH_RISK_COUNT high-severity risk(s)"
fi
fi
# -----------------------------------------------------------------------
# GCP: function_timeout - expect MEDIUM risk
# -----------------------------------------------------------------------
if [[ "$SCENARIO" == "function_timeout" ]]; then
echo "Testing: function_timeout - expect timeout risk"
if [ "$RISK_COUNT" -eq 0 ]; then
echo "❌ FAIL: Expected timeout-related risk for Cloud Function"
VALIDATION_PASSED=false
VALIDATION_MESSAGES="Expected Cloud Function timeout risk not found"
else
echo "✅ PASS: Found risk(s) for function timeout scenario"
fi
fi
echo ""
echo "=========================================="
# Write validation results to summary
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Scenario Validation: \`$SCENARIO\`" >> $GITHUB_STEP_SUMMARY
if [ "$VALIDATION_PASSED" = true ]; then
echo "✅ **PASSED** - Expected risks were detected" >> $GITHUB_STEP_SUMMARY
echo "validation_passed=true" >> $GITHUB_OUTPUT
else
echo "❌ **FAILED** - $VALIDATION_MESSAGES" >> $GITHUB_STEP_SUMMARY
echo "validation_passed=false" >> $GITHUB_OUTPUT
echo "::error::Scenario validation failed: $VALIDATION_MESSAGES"
exit 1
fi
- name: Upload analysis results
uses: actions/upload-artifact@v4
if: ${{ always() && (inputs.action == 'plan' || inputs.action == 'apply') }}
with:
name: change-analysis-${{ inputs.scenario }}-${{ github.run_id }}
path: |
${{ env.WORKING_DIR }}/change-results.json
${{ env.WORKING_DIR }}/tfplan.json
if-no-files-found: ignore
# =========================================================================
# PromptFoo Quality Evals
# Run LLM-as-judge evaluation on the risk analysis quality
# =========================================================================
- name: Setup Node.js for PromptFoo
uses: actions/setup-node@v4
if: ${{ steps.get-results.outcome == 'success' && inputs.scenario != 'none' }}
with:
node-version: '20'
- name: Install PromptFoo dependencies
if: ${{ steps.get-results.outcome == 'success' && inputs.scenario != 'none' }}
working-directory: ${{ env.WORKING_DIR }}/evals
run: npm install
- name: Run PromptFoo quality evals
id: promptfoo-eval
if: ${{ steps.get-results.outcome == 'success' && inputs.scenario != 'none' }}
continue-on-error: true
working-directory: ${{ env.WORKING_DIR }}/evals
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_KEY }}
run: |
echo "Running PromptFoo evals for scenario: ${{ inputs.scenario }}"
# Run evals with the change results file
npx promptfoo eval \
--vars "results_file=../change-results.json,scenario=${{ inputs.scenario }}" \
--filter-description "${{ inputs.scenario }}" \
--output eval-results.json \
--no-cache
# Extract summary metrics
if [ -f eval-results.json ]; then
PASS_COUNT=$(jq '[.results[].success] | map(select(. == true)) | length' eval-results.json)
TOTAL_COUNT=$(jq '[.results[].success] | length' eval-results.json)
echo "" >> $GITHUB_STEP_SUMMARY
echo "### PromptFoo Quality Eval" >> $GITHUB_STEP_SUMMARY
echo "- **Passed:** $PASS_COUNT / $TOTAL_COUNT assertions" >> $GITHUB_STEP_SUMMARY
# Show any failures
jq -r '.results[] | select(.success == false) | "- ❌ \(.description): \(.error // "assertion failed")"' eval-results.json >> $GITHUB_STEP_SUMMARY || true
fi
- name: Upload PromptFoo results
uses: actions/upload-artifact@v4
if: ${{ steps.promptfoo-eval.outcome != 'skipped' }}
with:
name: promptfoo-results-${{ inputs.scenario }}-${{ github.run_id }}
path: ${{ env.WORKING_DIR }}/evals/eval-results.json
if-no-files-found: ignore
# Cost Analysis disabled for scale testing (plan too large)
# - name: Cost Analysis
# uses: overmindtech/cost-signals-action@v1
# continue-on-error: true
# if: ${{ inputs.action == 'plan' || inputs.action == 'apply' }}
# with:
# overmind-api-key: ${{ secrets.OVM_API_KEY }}
# infracost-api-key: ${{ secrets.INFRACOST_API_KEY }}
# terraform-plan-json: ${{ env.WORKING_DIR }}/tfplan.json
# ticket-link: ${{ steps.submit-plan.outputs.change-url }}
- name: Start Overmind Change
uses: overmindtech/actions/start-change@main
continue-on-error: true
if: ${{ inputs.action == 'apply' }}
with:
ovm-api-key: ${{ secrets.OVM_API_KEY }}
- name: Terraform Apply
id: apply
if: ${{ inputs.action == 'apply' }}
run: |
echo "Applying scale test infrastructure (×${{ inputs.scale_multiplier }})"
terraform apply \
-auto-approve \
-no-color \
-input=false \
-lock-timeout=5m \
tfplan
echo "## Apply Complete" >> $GITHUB_STEP_SUMMARY
echo "Scale test infrastructure deployed with multiplier ×${{ inputs.scale_multiplier }}" >> $GITHUB_STEP_SUMMARY
- name: End Overmind Change
uses: overmindtech/actions/end-change@main
continue-on-error: true
if: ${{ (inputs.action == 'apply') && (success() || failure() || cancelled()) }}
with:
ovm-api-key: ${{ secrets.OVM_API_KEY }}
- name: Terraform Destroy
id: destroy
if: ${{ inputs.action == 'destroy' }}
run: |
echo "::warning::Destroying scale test infrastructure (×${{ inputs.scale_multiplier }})"
terraform destroy \
-auto-approve \
-no-color \
-input=false \
-lock-timeout=10m
echo "## Destroy Complete" >> $GITHUB_STEP_SUMMARY
echo "Scale test infrastructure destroyed" >> $GITHUB_STEP_SUMMARY
- name: Output Terraform Summary
if: always()
run: |
echo ""
echo "=== Scale Test Summary ==="
echo "Action: ${{ inputs.action }}"
echo "Multiplier: ${{ inputs.scale_multiplier }}"
echo "Scenario: ${{ inputs.scenario }}"
echo "Status: ${{ job.status }}"