ci: add GitHub Actions workflows for CI, eval, and release

reaatech · reaatech · commit 344e2c8edc29 · 2026-04-19T07:38:49.000-07:00
diff --git a/.github/TEMPLATE.md b/.github/TEMPLATE.md
@@ -0,0 +1,40 @@
+# Template Repository Configuration
+
+This repository is configured as a GitHub template repository.
+
+## Usage
+
+1. Navigate to the repository on GitHub
+2. Click "Use this template" → "Create a new repository"
+3. Enter repository name and description
+4. Click "Create repository from template"
+
+## Template Files
+
+The following files are template-specific:
+
+- `.github/TEMPLATE.md` — This file
+- `CONTRIBUTING.md` — Contribution guidelines
+- `CHANGELOG.md` — Version history
+
+## Post-Creation Steps
+
+After creating a repository from this template:
+
+1. Update `package.json` with new repository name
+2. Update `README.md` with project-specific details
+3. Configure CI/CD workflows with your secrets
+4. Set up infrastructure with your cloud provider
+5. Update license year and copyright holder
+
+## Included Templates
+
+- GitHub Actions workflows (CI, release, evaluation)
+- Docker configuration
+- Terraform infrastructure
+- TypeScript project structure
+- Comprehensive documentation
+
+## License
+
+This template is released under the MIT License.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,140 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '22'
+          cache: 'npm'
+          cache-dependency-path: package-lock.json
+      
+      - name: Install dependencies
+        run: npm ci
+      
+      - name: Run ESLint
+        run: npm run lint
+      
+      - name: Check formatting
+        run: npm run format:check
+
+  typecheck:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '22'
+          cache: 'npm'
+          cache-dependency-path: package-lock.json
+      
+      - name: Install dependencies
+        run: npm ci
+      
+      - name: Run TypeScript compiler
+        run: npm run typecheck
+
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '22'
+          cache: 'npm'
+          cache-dependency-path: package-lock.json
+      
+      - name: Install dependencies
+        run: npm ci
+      
+      - name: Run unit tests
+        run: npm run test:unit -- --coverage
+      
+      - name: Run integration tests
+        run: npm run test:integration
+      
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v3
+        with:
+          files: ./coverage/lcov.info
+          fail_ci_if_error: false
+
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '22'
+          cache: 'npm'
+          cache-dependency-path: package-lock.json
+      
+      - name: Install dependencies
+        run: npm ci
+      
+      - name: Build
+        run: npm run build
+      
+      - name: Verify build output
+        run: |
+          test -d dist/
+          test -f dist/index.js
+          test -f dist/cli.js
+
+  docker:
+    runs-on: ubuntu-latest
+    needs: [lint, typecheck, test, build]
+    steps:
+      - uses: actions/checkout@v4
+      
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      
+      - name: Build Docker image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          push: false
+          tags: agent-eval-harness:${{ github.sha }}
+          cache-from: type=registry,ref=user/agent-eval-harness:buildcache
+          cache-to: type=inline
+
+  security:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '22'
+          cache: 'npm'
+          cache-dependency-path: package-lock.json
+      
+      - name: Install dependencies
+        run: npm ci
+      
+      - name: Run npm audit
+        run: npm audit --audit-level=moderate
+      
+      - name: Run Snyk to check for vulnerabilities
+        uses: snyk/actions/node@master
+        continue-on-error: true
+        env:
+          SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }}
diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml
@@ -0,0 +1,146 @@
+name: Agent Evaluation
+
+on:
+  pull_request:
+    branches: [main]
+  push:
+    branches: [main]
+
+jobs:
+  evaluate:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '22'
+          cache: 'npm'
+          cache-dependency-path: package-lock.json
+      
+      - name: Install dependencies
+        run: npm ci
+      
+      - name: Build
+        run: npm run build
+      
+      - name: Download baseline results
+        if: github.event_name == 'pull_request'
+        uses: dawidd6/action-download-artifact@v2
+        with:
+          workflow: eval.yml
+          branch: ${{ github.base_ref }}
+          name: eval-results
+          path: baseline/
+        continue-on-error: true
+      
+      - name: Run evaluation suite
+        env:
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+        run: |
+          mkdir -p results
+          npx agent-eval-harness eval \
+            trajectories/examples/*.jsonl \
+            --config eval-config.yaml \
+            --output results/
+      
+      - name: Run regression gates
+        if: github.event_name == 'pull_request' && hashFiles('baseline/') != ''
+        run: |
+          npx agent-eval-harness compare \
+            baseline/results.json \
+            results/results.json \
+            --format markdown \
+            --output results/comparison.md
+      
+      - name: Check gates
+        run: |
+          npx agent-eval-harness gate \
+            results/results.json \
+            --preset standard \
+            --exit-code
+      
+      - name: Upload evaluation results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: eval-results
+          path: results/
+          retention-days: 30
+      
+      - name: Comment on PR
+        if: github.event_name == 'pull_request' && always()
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+            const path = require('path');
+            
+            // Read results
+            let comment = '## Agent Evaluation Results\n\n';
+            
+            try {
+              const results = JSON.parse(fs.readFileSync('results/results.json', 'utf8'));
+              
+              comment += `**Overall Score:** ${(results.overallMetrics.overallScore * 100).toFixed(1)}%\n`;
+              comment += `**Pass Rate:** ${results.summary.passRate.toFixed(1)}%\n`;
+              comment += `**Trajectories:** ${results.trajectory_count}\n\n`;
+              
+              if (results.gates) {
+                comment += `**Gates:** ${results.gates.overallPassed ? '✅ Passed' : '❌ Failed'}\n\n`;
+                
+                if (!results.gates.overallPassed) {
+                  comment += '### Failed Gates\n\n';
+                  for (const gate of results.gates.results) {
+                    if (!gate.passed) {
+                      comment += `- **${gate.gate_name}:** ${gate.message || 'Failed'}\n`;
+                    }
+                  }
+                  comment += '\n';
+                }
+              }
+              
+              if (fs.existsSync('results/comparison.md')) {
+                comment += '### Comparison with Baseline\n\n';
+                comment += fs.readFileSync('results/comparison.md', 'utf8');
+              }
+            } catch (error) {
+              comment += '⚠️ Evaluation results could not be parsed.\n';
+            }
+            
+            comment += '\n---\n*Generated by agent-eval-harness*';
+            
+            // Find existing comment
+            const { data: comments } = await github.rest.issues.listComments({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+            });
+            
+            const botComment = comments.find(comment => 
+              comment.user.type === 'Bot' && 
+              comment.body.includes('Agent Evaluation Results')
+            );
+            
+            if (botComment) {
+              await github.rest.issues.updateComment({
+                comment_id: botComment.id,
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                body: comment
+              });
+            } else {
+              await github.rest.issues.createComment({
+                issue_number: context.issue.number,
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                body: comment
+              });
+            }
+      
+      - name: Fail if gates failed
+        if: failure()
+        run: |
+          echo "Evaluation gates failed. Please review the results above."
+          exit 1
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml