Skip to content

Commit 344e2c8

Browse files
committed
ci: add GitHub Actions workflows for CI, eval, and release
1 parent 2cf7828 commit 344e2c8

4 files changed

Lines changed: 404 additions & 0 deletions

File tree

.github/TEMPLATE.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Template Repository Configuration
2+
3+
This repository is configured as a GitHub template repository.
4+
5+
## Usage
6+
7+
1. Navigate to the repository on GitHub
8+
2. Click "Use this template" → "Create a new repository"
9+
3. Enter repository name and description
10+
4. Click "Create repository from template"
11+
12+
## Template Files
13+
14+
The following files are template-specific:
15+
16+
- `.github/TEMPLATE.md` — This file
17+
- `CONTRIBUTING.md` — Contribution guidelines
18+
- `CHANGELOG.md` — Version history
19+
20+
## Post-Creation Steps
21+
22+
After creating a repository from this template:
23+
24+
1. Update `package.json` with new repository name
25+
2. Update `README.md` with project-specific details
26+
3. Configure CI/CD workflows with your secrets
27+
4. Set up infrastructure with your cloud provider
28+
5. Update license year and copyright holder
29+
30+
## Included Templates
31+
32+
- GitHub Actions workflows (CI, release, evaluation)
33+
- Docker configuration
34+
- Terraform infrastructure
35+
- TypeScript project structure
36+
- Comprehensive documentation
37+
38+
## License
39+
40+
This template is released under the MIT License.

.github/workflows/ci.yml

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
name: CI
2+
3+
on:
4+
push:
5+
branches: [main]
6+
pull_request:
7+
branches: [main]
8+
9+
jobs:
10+
lint:
11+
runs-on: ubuntu-latest
12+
steps:
13+
- uses: actions/checkout@v4
14+
15+
- name: Setup Node.js
16+
uses: actions/setup-node@v4
17+
with:
18+
node-version: '22'
19+
cache: 'npm'
20+
cache-dependency-path: package-lock.json
21+
22+
- name: Install dependencies
23+
run: npm ci
24+
25+
- name: Run ESLint
26+
run: npm run lint
27+
28+
- name: Check formatting
29+
run: npm run format:check
30+
31+
typecheck:
32+
runs-on: ubuntu-latest
33+
steps:
34+
- uses: actions/checkout@v4
35+
36+
- name: Setup Node.js
37+
uses: actions/setup-node@v4
38+
with:
39+
node-version: '22'
40+
cache: 'npm'
41+
cache-dependency-path: package-lock.json
42+
43+
- name: Install dependencies
44+
run: npm ci
45+
46+
- name: Run TypeScript compiler
47+
run: npm run typecheck
48+
49+
test:
50+
runs-on: ubuntu-latest
51+
steps:
52+
- uses: actions/checkout@v4
53+
54+
- name: Setup Node.js
55+
uses: actions/setup-node@v4
56+
with:
57+
node-version: '22'
58+
cache: 'npm'
59+
cache-dependency-path: package-lock.json
60+
61+
- name: Install dependencies
62+
run: npm ci
63+
64+
- name: Run unit tests
65+
run: npm run test:unit -- --coverage
66+
67+
- name: Run integration tests
68+
run: npm run test:integration
69+
70+
- name: Upload coverage to Codecov
71+
uses: codecov/codecov-action@v3
72+
with:
73+
files: ./coverage/lcov.info
74+
fail_ci_if_error: false
75+
76+
build:
77+
runs-on: ubuntu-latest
78+
steps:
79+
- uses: actions/checkout@v4
80+
81+
- name: Setup Node.js
82+
uses: actions/setup-node@v4
83+
with:
84+
node-version: '22'
85+
cache: 'npm'
86+
cache-dependency-path: package-lock.json
87+
88+
- name: Install dependencies
89+
run: npm ci
90+
91+
- name: Build
92+
run: npm run build
93+
94+
- name: Verify build output
95+
run: |
96+
test -d dist/
97+
test -f dist/index.js
98+
test -f dist/cli.js
99+
100+
docker:
101+
runs-on: ubuntu-latest
102+
needs: [lint, typecheck, test, build]
103+
steps:
104+
- uses: actions/checkout@v4
105+
106+
- name: Set up Docker Buildx
107+
uses: docker/setup-buildx-action@v3
108+
109+
- name: Build Docker image
110+
uses: docker/build-push-action@v5
111+
with:
112+
context: .
113+
push: false
114+
tags: agent-eval-harness:${{ github.sha }}
115+
cache-from: type=registry,ref=user/agent-eval-harness:buildcache
116+
cache-to: type=inline
117+
118+
security:
119+
runs-on: ubuntu-latest
120+
steps:
121+
- uses: actions/checkout@v4
122+
123+
- name: Setup Node.js
124+
uses: actions/setup-node@v4
125+
with:
126+
node-version: '22'
127+
cache: 'npm'
128+
cache-dependency-path: package-lock.json
129+
130+
- name: Install dependencies
131+
run: npm ci
132+
133+
- name: Run npm audit
134+
run: npm audit --audit-level=moderate
135+
136+
- name: Run Snyk to check for vulnerabilities
137+
uses: snyk/actions/node@master
138+
continue-on-error: true
139+
env:
140+
SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }}

.github/workflows/eval.yml

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
name: Agent Evaluation
2+
3+
on:
4+
pull_request:
5+
branches: [main]
6+
push:
7+
branches: [main]
8+
9+
jobs:
10+
evaluate:
11+
runs-on: ubuntu-latest
12+
steps:
13+
- uses: actions/checkout@v4
14+
15+
- name: Setup Node.js
16+
uses: actions/setup-node@v4
17+
with:
18+
node-version: '22'
19+
cache: 'npm'
20+
cache-dependency-path: package-lock.json
21+
22+
- name: Install dependencies
23+
run: npm ci
24+
25+
- name: Build
26+
run: npm run build
27+
28+
- name: Download baseline results
29+
if: github.event_name == 'pull_request'
30+
uses: dawidd6/action-download-artifact@v2
31+
with:
32+
workflow: eval.yml
33+
branch: ${{ github.base_ref }}
34+
name: eval-results
35+
path: baseline/
36+
continue-on-error: true
37+
38+
- name: Run evaluation suite
39+
env:
40+
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
41+
run: |
42+
mkdir -p results
43+
npx agent-eval-harness eval \
44+
trajectories/examples/*.jsonl \
45+
--config eval-config.yaml \
46+
--output results/
47+
48+
- name: Run regression gates
49+
if: github.event_name == 'pull_request' && hashFiles('baseline/') != ''
50+
run: |
51+
npx agent-eval-harness compare \
52+
baseline/results.json \
53+
results/results.json \
54+
--format markdown \
55+
--output results/comparison.md
56+
57+
- name: Check gates
58+
run: |
59+
npx agent-eval-harness gate \
60+
results/results.json \
61+
--preset standard \
62+
--exit-code
63+
64+
- name: Upload evaluation results
65+
if: always()
66+
uses: actions/upload-artifact@v4
67+
with:
68+
name: eval-results
69+
path: results/
70+
retention-days: 30
71+
72+
- name: Comment on PR
73+
if: github.event_name == 'pull_request' && always()
74+
uses: actions/github-script@v7
75+
with:
76+
script: |
77+
const fs = require('fs');
78+
const path = require('path');
79+
80+
// Read results
81+
let comment = '## Agent Evaluation Results\n\n';
82+
83+
try {
84+
const results = JSON.parse(fs.readFileSync('results/results.json', 'utf8'));
85+
86+
comment += `**Overall Score:** ${(results.overallMetrics.overallScore * 100).toFixed(1)}%\n`;
87+
comment += `**Pass Rate:** ${results.summary.passRate.toFixed(1)}%\n`;
88+
comment += `**Trajectories:** ${results.trajectory_count}\n\n`;
89+
90+
if (results.gates) {
91+
comment += `**Gates:** ${results.gates.overallPassed ? '✅ Passed' : '❌ Failed'}\n\n`;
92+
93+
if (!results.gates.overallPassed) {
94+
comment += '### Failed Gates\n\n';
95+
for (const gate of results.gates.results) {
96+
if (!gate.passed) {
97+
comment += `- **${gate.gate_name}:** ${gate.message || 'Failed'}\n`;
98+
}
99+
}
100+
comment += '\n';
101+
}
102+
}
103+
104+
if (fs.existsSync('results/comparison.md')) {
105+
comment += '### Comparison with Baseline\n\n';
106+
comment += fs.readFileSync('results/comparison.md', 'utf8');
107+
}
108+
} catch (error) {
109+
comment += '⚠️ Evaluation results could not be parsed.\n';
110+
}
111+
112+
comment += '\n---\n*Generated by agent-eval-harness*';
113+
114+
// Find existing comment
115+
const { data: comments } = await github.rest.issues.listComments({
116+
issue_number: context.issue.number,
117+
owner: context.repo.owner,
118+
repo: context.repo.repo,
119+
});
120+
121+
const botComment = comments.find(comment =>
122+
comment.user.type === 'Bot' &&
123+
comment.body.includes('Agent Evaluation Results')
124+
);
125+
126+
if (botComment) {
127+
await github.rest.issues.updateComment({
128+
comment_id: botComment.id,
129+
owner: context.repo.owner,
130+
repo: context.repo.repo,
131+
body: comment
132+
});
133+
} else {
134+
await github.rest.issues.createComment({
135+
issue_number: context.issue.number,
136+
owner: context.repo.owner,
137+
repo: context.repo.repo,
138+
body: comment
139+
});
140+
}
141+
142+
- name: Fail if gates failed
143+
if: failure()
144+
run: |
145+
echo "Evaluation gates failed. Please review the results above."
146+
exit 1

0 commit comments

Comments
 (0)