Scale Test Infrastructure #92

Workflow file for this run

.github/workflows/scale-test.yml at d6bcd0c

	name: Scale Test Infrastructure

	on:
	workflow_dispatch:
	inputs:
	action:
	description: 'Action to perform'
	required: true
	type: choice
	options:
	- plan
	- apply
	- destroy
	scale_multiplier:
	description: 'Resource multiplier (1=175, 10=1740, 50=8700 resources)'
	required: true
	type: choice
	default: '1'
	options:
	- '1'
	- '5'
	- '10'
	- '25'
	- '50'
	scenario:
	description: 'Test scenario (triggers specific risks in Overmind)'
	required: false
	type: choice
	default: 'none'
	options:
	- 'none'
	# AWS scenarios
	- 'lambda_timeout'
	- 'shared_sg_open'
	- 'vpc_peering_change'
	- 'central_sns_change'
	- 'combined_network'
	- 'combined_all'
	- 'combined_max'
	# GCP scenarios (requires cloud_provider=gcp or both)
	- 'shared_firewall_open'
	- 'central_pubsub_change'
	- 'gce_downgrade'
	- 'function_timeout'
	- 'combined_gcp_all'
	cloud_provider:
	description: 'Cloud provider to deploy'
	required: true
	type: choice
	default: 'aws'
	options:
	- 'aws'
	- 'gcp'
	- 'both'
	confirmation:
	description: 'For destroy: type DESTROY-SCALE-TEST to confirm'
	required: false
	type: string

	env:
	TF_VAR_scale_multiplier: ${{ inputs.scale_multiplier }}
	TF_VAR_scenario: ${{ inputs.scenario }}
	TF_VAR_cloud_provider: ${{ inputs.cloud_provider }}
	WORKING_DIR: scale-test

	jobs:
	validate:
	name: Validate Inputs
	runs-on: ubuntu-latest
	steps:
	- name: Validate destroy confirmation
	if: ${{ inputs.action == 'destroy' }}
	run: \|
	if [ "${{ inputs.confirmation }}" != "DESTROY-SCALE-TEST" ]; then
	echo "::error::Destroy requires confirmation. Type 'DESTROY-SCALE-TEST' in the confirmation field."
	exit 1
	fi
	echo "Destroy confirmation validated"

	terraform:
	name: Terraform ${{ inputs.action }} (×${{ inputs.scale_multiplier }}, scenario=${{ inputs.scenario }})
	needs: validate
	runs-on: ubuntu-latest
	permissions:
	contents: read
	id-token: write
	pull-requests: write
	concurrency:
	group: scale-test-tfstate
	cancel-in-progress: false

	defaults:
	run:
	working-directory: ${{ env.WORKING_DIR }}

	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Setup Terraform
	uses: hashicorp/setup-terraform@v3
	with:
	terraform_wrapper: false

	- name: Configure AWS Credentials
	uses: aws-actions/configure-aws-credentials@v4
	with:
	audience: sts.amazonaws.com
	aws-region: us-east-1
	role-to-assume: ${{ vars.TERRAFORM_DEPLOY_ROLE }}

	# Always authenticate with GCP - Terraform validates all providers during init
	# even when cloud_provider=aws (no GCP resources created)
	- name: Configure GCP Credentials
	uses: google-github-actions/auth@v2
	with:
	credentials_json: ${{ secrets.OVERMIND_SCALE_TEST }}

	- name: Setup GCP SDK
	uses: google-github-actions/setup-gcloud@v2
	with:
	project_id: overmind-scale-test

	- name: Terraform Init
	id: init
	run: \|
	terraform version
	terraform init -input=false

	- name: Terraform Plan
	id: plan
	if: ${{ inputs.action == 'plan' \|\| inputs.action == 'apply' }}
	run: \|
	set -o pipefail
	echo "Planning with scale_multiplier=${{ inputs.scale_multiplier }}, scenario=${{ inputs.scenario }}"
	terraform plan \
	-compact-warnings \
	-no-color \
	-input=false \
	-lock-timeout=5m \
	-out=tfplan 2>&1 \| tee terraform_plan.log

	# Generate JSON plan for Overmind
	terraform show -json tfplan > tfplan.json

	# Output summary
	echo "## Terraform Plan Summary" >> $GITHUB_STEP_SUMMARY
	echo "- Scale Multiplier: ${{ inputs.scale_multiplier }}" >> $GITHUB_STEP_SUMMARY
	echo "- Scenario: ${{ inputs.scenario }}" >> $GITHUB_STEP_SUMMARY
	echo "- Action: ${{ inputs.action }}" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "### Resource Changes" >> $GITHUB_STEP_SUMMARY
	terraform show -no-color tfplan \| grep -E "^(Plan:\|No changes)" >> $GITHUB_STEP_SUMMARY \|\| true

	- name: Install Overmind CLI
	uses: overmindtech/actions/install-cli@main
	id: install-cli
	with:
	version: latest
	github-token: ${{ secrets.GITHUB_TOKEN }}

	- name: Submit Plan to Overmind
	uses: overmindtech/actions/submit-plan@main
	continue-on-error: true
	id: submit-plan
	if: ${{ inputs.action == 'plan' \|\| inputs.action == 'apply' }}
	with:
	ovm-api-key: ${{ secrets.OVM_API_KEY }}
	plan-json: ${{ env.WORKING_DIR }}/tfplan.json
	tags: 'model=risks_v6'

	# =========================================================================
	# Quality Evaluation Steps
	# Capture analysis results and validate expected risks for each scenario
	# =========================================================================

	- name: Get change results as JSON
	id: get-results
	if: ${{ steps.submit-plan.outputs.change-url != '' && (inputs.action == 'plan' \|\| inputs.action == 'apply') }}
	continue-on-error: true
	env:
	OVM_API_KEY: ${{ secrets.OVM_API_KEY }}
	run: \|
	echo "Fetching change results from: ${{ steps.submit-plan.outputs.change-url }}"

	# Verify overmind CLI is available
	if ! command -v overmind &> /dev/null; then
	echo "::error::Overmind CLI not found. Install step may have failed."
	exit 1
	fi

	# Get the full change analysis as JSON
	overmind changes get-change \
	--change "${{ steps.submit-plan.outputs.change-url }}" \
	--format json \
	> change-results.json

	# Extract key metrics for assertions
	RISK_COUNT=$(jq '.risks \| length // 0' change-results.json)
	HIGH_RISK_COUNT=$(jq '[.risks[]? \| select(.severity == "high" or .severity == "critical")] \| length' change-results.json)
	MEDIUM_RISK_COUNT=$(jq '[.risks[]? \| select(.severity == "medium")] \| length' change-results.json)

	echo "risk_count=$RISK_COUNT" >> $GITHUB_OUTPUT
	echo "high_risk_count=$HIGH_RISK_COUNT" >> $GITHUB_OUTPUT
	echo "medium_risk_count=$MEDIUM_RISK_COUNT" >> $GITHUB_OUTPUT

	echo "## Change Analysis Results" >> $GITHUB_STEP_SUMMARY
	echo "- Total Risks: $RISK_COUNT" >> $GITHUB_STEP_SUMMARY
	echo "- High/Critical Risks: $HIGH_RISK_COUNT" >> $GITHUB_STEP_SUMMARY
	echo "- Medium Risks: $MEDIUM_RISK_COUNT" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY

	# List all risks in summary
	echo "### Detected Risks" >> $GITHUB_STEP_SUMMARY
	if [ "$RISK_COUNT" -gt 0 ]; then
	jq -r '.risks[]? \| "- [\(.severity)] \(.title)"' change-results.json >> $GITHUB_STEP_SUMMARY
	else
	echo "_No risks detected_" >> $GITHUB_STEP_SUMMARY
	fi

	- name: Validate scenario results
	id: validate-scenario
	if: ${{ steps.get-results.outcome == 'success' && inputs.scenario != 'none' }}
	run: \|
	SCENARIO="${{ inputs.scenario }}"
	RISK_COUNT="${{ steps.get-results.outputs.risk_count }}"
	HIGH_RISK_COUNT="${{ steps.get-results.outputs.high_risk_count }}"
	MEDIUM_RISK_COUNT="${{ steps.get-results.outputs.medium_risk_count }}"

	echo ""
	echo "=========================================="
	echo "Validating results for scenario: $SCENARIO"
	echo "=========================================="
	echo "Total risks found: $RISK_COUNT"
	echo "High/Critical risks: $HIGH_RISK_COUNT"
	echo "Medium risks: $MEDIUM_RISK_COUNT"
	echo ""

	VALIDATION_PASSED=true
	VALIDATION_MESSAGES=""

	# -----------------------------------------------------------------------
	# shared_sg_open: SSH open to 0.0.0.0/0 - expect HIGH/CRITICAL risk
	# -----------------------------------------------------------------------
	if [[ "$SCENARIO" == "shared_sg_open" ]]; then
	echo "Testing: shared_sg_open - expect high/critical security risk"
	if [ "$HIGH_RISK_COUNT" -eq 0 ]; then
	echo "❌ FAIL: Expected high-severity risk for SSH-to-internet scenario"
	VALIDATION_PASSED=false
	VALIDATION_MESSAGES="Expected high-severity SSH risk not found"
	else
	echo "✅ PASS: Found $HIGH_RISK_COUNT high-severity risk(s) as expected"
	fi
	fi

	# -----------------------------------------------------------------------
	# lambda_timeout: Timeout reduced to 1s - expect MEDIUM risk
	# -----------------------------------------------------------------------
	if [[ "$SCENARIO" == "lambda_timeout" ]]; then
	echo "Testing: lambda_timeout - expect timeout/reliability risk"
	TIMEOUT_RISK=$(jq '[.risks[]? \| select(.title \| test("timeout\|reliability\|function"; "i"))] \| length' change-results.json)
	if [ "$TIMEOUT_RISK" -eq 0 ] && [ "$RISK_COUNT" -eq 0 ]; then
	echo "❌ FAIL: Expected timeout-related risk"
	VALIDATION_PASSED=false
	VALIDATION_MESSAGES="Expected timeout risk not found"
	else
	echo "✅ PASS: Found risk(s) for timeout scenario"
	fi
	fi

	# -----------------------------------------------------------------------
	# vpc_peering_change: DNS resolution enabled - expect analysis (may or may not be risk)
	# -----------------------------------------------------------------------
	if [[ "$SCENARIO" == "vpc_peering_change" ]]; then
	echo "Testing: vpc_peering_change - expect network analysis"
	if [ "$RISK_COUNT" -eq 0 ]; then
	echo "⚠️ INFO: No risks found for VPC peering change (may be expected)"
	else
	echo "✅ PASS: Found $RISK_COUNT risk(s) for VPC peering change"
	fi
	# VPC peering is ambiguous, so we don't fail on no risks
	fi

	# -----------------------------------------------------------------------
	# central_sns_change: SNS topic policy change - expect HIGH risk
	# -----------------------------------------------------------------------
	if [[ "$SCENARIO" == "central_sns_change" ]]; then
	echo "Testing: central_sns_change - expect SNS policy risk"
	if [ "$HIGH_RISK_COUNT" -eq 0 ] && [ "$MEDIUM_RISK_COUNT" -eq 0 ]; then
	echo "❌ FAIL: Expected risk for SNS topic policy change"
	VALIDATION_PASSED=false
	VALIDATION_MESSAGES="Expected SNS policy risk not found"
	else
	echo "✅ PASS: Found risk(s) for SNS change"
	fi
	fi

	# -----------------------------------------------------------------------
	# combined_network: Multiple network/security changes - expect HIGH/CRITICAL
	# -----------------------------------------------------------------------
	if [[ "$SCENARIO" == "combined_network" \|\| "$SCENARIO" == "combined_all" \|\| "$SCENARIO" == "combined_max" ]]; then
	echo "Testing: $SCENARIO - expect multiple high-severity risks"
	if [ "$HIGH_RISK_COUNT" -eq 0 ]; then
	echo "❌ FAIL: Expected high-severity risks for combined scenario"
	VALIDATION_PASSED=false
	VALIDATION_MESSAGES="Expected high-severity risks for combined scenario not found"
	else
	echo "✅ PASS: Found $HIGH_RISK_COUNT high-severity risk(s)"
	fi
	fi

	# -----------------------------------------------------------------------
	# GCP: shared_firewall_open - expect HIGH/CRITICAL risk
	# -----------------------------------------------------------------------
	if [[ "$SCENARIO" == "shared_firewall_open" ]]; then
	echo "Testing: shared_firewall_open - expect high/critical security risk"
	if [ "$HIGH_RISK_COUNT" -eq 0 ]; then
	echo "❌ FAIL: Expected high-severity risk for firewall-to-internet scenario"
	VALIDATION_PASSED=false
	VALIDATION_MESSAGES="Expected high-severity firewall risk not found"
	else
	echo "✅ PASS: Found $HIGH_RISK_COUNT high-severity risk(s)"
	fi
	fi

	# -----------------------------------------------------------------------
	# GCP: function_timeout - expect MEDIUM risk
	# -----------------------------------------------------------------------
	if [[ "$SCENARIO" == "function_timeout" ]]; then
	echo "Testing: function_timeout - expect timeout risk"
	if [ "$RISK_COUNT" -eq 0 ]; then
	echo "❌ FAIL: Expected timeout-related risk for Cloud Function"
	VALIDATION_PASSED=false
	VALIDATION_MESSAGES="Expected Cloud Function timeout risk not found"
	else
	echo "✅ PASS: Found risk(s) for function timeout scenario"
	fi
	fi

	echo ""
	echo "=========================================="

	# Write validation results to summary
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "### Scenario Validation: \`$SCENARIO\`" >> $GITHUB_STEP_SUMMARY

	if [ "$VALIDATION_PASSED" = true ]; then
	echo "✅ PASSED - Expected risks were detected" >> $GITHUB_STEP_SUMMARY
	echo "validation_passed=true" >> $GITHUB_OUTPUT
	else
	echo "❌ FAILED - $VALIDATION_MESSAGES" >> $GITHUB_STEP_SUMMARY
	echo "validation_passed=false" >> $GITHUB_OUTPUT
	echo "::error::Scenario validation failed: $VALIDATION_MESSAGES"
	exit 1
	fi

	- name: Upload analysis results
	uses: actions/upload-artifact@v4
	if: ${{ always() && (inputs.action == 'plan' \|\| inputs.action == 'apply') }}
	with:
	name: change-analysis-${{ inputs.scenario }}-${{ github.run_id }}
	path: \|
	${{ env.WORKING_DIR }}/change-results.json
	${{ env.WORKING_DIR }}/tfplan.json
	if-no-files-found: ignore

	# =========================================================================
	# PromptFoo Quality Evals
	# Run LLM-as-judge evaluation on the risk analysis quality
	# =========================================================================

	- name: Setup Node.js for PromptFoo
	uses: actions/setup-node@v4
	if: ${{ steps.get-results.outcome == 'success' && inputs.scenario != 'none' }}
	with:
	node-version: '20'

	- name: Install PromptFoo dependencies
	if: ${{ steps.get-results.outcome == 'success' && inputs.scenario != 'none' }}
	working-directory: ${{ env.WORKING_DIR }}/evals
	run: npm install

	- name: Run PromptFoo quality evals
	id: promptfoo-eval
	if: ${{ steps.get-results.outcome == 'success' && inputs.scenario != 'none' }}
	continue-on-error: true
	working-directory: ${{ env.WORKING_DIR }}/evals
	env:
	OPENAI_API_KEY: ${{ secrets.OPENAI_KEY }}
	run: \|
	echo "Running PromptFoo evals for scenario: ${{ inputs.scenario }}"

	# Run evals with the change results file
	npx promptfoo eval \
	--vars "results_file=../change-results.json,scenario=${{ inputs.scenario }}" \
	--filter-description "${{ inputs.scenario }}" \
	--output eval-results.json \
	--no-cache

	# Extract summary metrics
	if [ -f eval-results.json ]; then
	PASS_COUNT=$(jq '[.results[].success] \| map(select(. == true)) \| length' eval-results.json)
	TOTAL_COUNT=$(jq '[.results[].success] \| length' eval-results.json)

	echo "" >> $GITHUB_STEP_SUMMARY
	echo "### PromptFoo Quality Eval" >> $GITHUB_STEP_SUMMARY
	echo "- Passed: $PASS_COUNT / $TOTAL_COUNT assertions" >> $GITHUB_STEP_SUMMARY

	# Show any failures
	jq -r '.results[] \| select(.success == false) \| "- ❌ \(.description): \(.error // "assertion failed")"' eval-results.json >> $GITHUB_STEP_SUMMARY \|\| true
	fi

	- name: Upload PromptFoo results
	uses: actions/upload-artifact@v4
	if: ${{ steps.promptfoo-eval.outcome != 'skipped' }}
	with:
	name: promptfoo-results-${{ inputs.scenario }}-${{ github.run_id }}
	path: ${{ env.WORKING_DIR }}/evals/eval-results.json
	if-no-files-found: ignore

	# Cost Analysis disabled for scale testing (plan too large)
	# - name: Cost Analysis
	# uses: overmindtech/cost-signals-action@v1
	# continue-on-error: true
	# if: ${{ inputs.action == 'plan' \|\| inputs.action == 'apply' }}
	# with:
	# overmind-api-key: ${{ secrets.OVM_API_KEY }}
	# infracost-api-key: ${{ secrets.INFRACOST_API_KEY }}
	# terraform-plan-json: ${{ env.WORKING_DIR }}/tfplan.json
	# ticket-link: ${{ steps.submit-plan.outputs.change-url }}

	- name: Start Overmind Change
	uses: overmindtech/actions/start-change@main
	continue-on-error: true
	if: ${{ inputs.action == 'apply' }}
	with:
	ovm-api-key: ${{ secrets.OVM_API_KEY }}

	- name: Terraform Apply
	id: apply
	if: ${{ inputs.action == 'apply' }}
	run: \|
	echo "Applying scale test infrastructure (×${{ inputs.scale_multiplier }})"
	terraform apply \
	-auto-approve \
	-no-color \
	-input=false \
	-lock-timeout=5m \
	tfplan

	echo "## Apply Complete" >> $GITHUB_STEP_SUMMARY
	echo "Scale test infrastructure deployed with multiplier ×${{ inputs.scale_multiplier }}" >> $GITHUB_STEP_SUMMARY

	- name: End Overmind Change
	uses: overmindtech/actions/end-change@main
	continue-on-error: true
	if: ${{ (inputs.action == 'apply') && (success() \|\| failure() \|\| cancelled()) }}
	with:
	ovm-api-key: ${{ secrets.OVM_API_KEY }}

	- name: Terraform Destroy
	id: destroy
	if: ${{ inputs.action == 'destroy' }}
	run: \|
	echo "::warning::Destroying scale test infrastructure (×${{ inputs.scale_multiplier }})"
	terraform destroy \
	-auto-approve \
	-no-color \
	-input=false \
	-lock-timeout=10m

	echo "## Destroy Complete" >> $GITHUB_STEP_SUMMARY
	echo "Scale test infrastructure destroyed" >> $GITHUB_STEP_SUMMARY

	- name: Output Terraform Summary
	if: always()
	run: \|
	echo ""
	echo "=== Scale Test Summary ==="
	echo "Action: ${{ inputs.action }}"
	echo "Multiplier: ${{ inputs.scale_multiplier }}"
	echo "Scenario: ${{ inputs.scenario }}"
	echo "Status: ${{ job.status }}"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Scale Test Infrastructure #92

Workflow file

Scale Test Infrastructure #92

Uh oh!

Workflow file for this run