Skip to content

Commit fccd6db

Browse files
kyle-ssgtalissoncostaclaude
authored
feat: visual regression e2e (#7102)
Co-authored-by: Talisson <talisson.odcosta@gmail.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 589c5d5 commit fccd6db

23 files changed

Lines changed: 442 additions & 21 deletions

.github/workflows/.reusable-docker-e2e-tests.yml

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,16 @@ on:
2727
description: The runner label to use. Defaults to `depot-ubuntu-latest`
2828
required: false
2929
default: depot-ubuntu-latest
30+
visual-regression:
31+
type: boolean
32+
description: Enable visual regression screenshot comparison
33+
required: false
34+
default: false
35+
visual-regression-update:
36+
type: boolean
37+
description: Update visual regression baselines (use on main branch)
38+
required: false
39+
default: false
3040
secrets:
3141
GCR_TOKEN:
3242
description: A token to use for logging into Github Container Registry. If not provided, login does not occur.
@@ -78,6 +88,24 @@ jobs:
7888
- name: Login to Depot Registry
7989
run: depot pull-token | docker login -u x-token --password-stdin registry.depot.dev
8090

91+
- name: Prepare visual regression snapshots directory
92+
if: inputs.visual-regression
93+
working-directory: frontend
94+
run: mkdir -p e2e/visual-regression-snapshots
95+
96+
- name: Download visual regression baselines
97+
if: inputs.visual-regression
98+
id: download-baseline
99+
continue-on-error: true
100+
uses: dawidd6/action-download-artifact@v6
101+
with:
102+
github_token: ${{ secrets.GCR_TOKEN }}
103+
workflow: platform-docker-build-test-publish.yml
104+
branch: main
105+
name: visual-regression-baselines
106+
path: frontend/e2e/visual-regression-snapshots/
107+
if_no_artifact_found: warn
108+
81109
- name: Run tests on dockerised frontend
82110
working-directory: frontend
83111
run: make test
@@ -87,6 +115,8 @@ jobs:
87115
E2E_IMAGE: ${{ inputs.e2e-image }}
88116
E2E_CONCURRENCY: ${{ inputs.concurrency }}
89117
E2E_RETRIES: 2
118+
VISUAL_REGRESSION: ${{ inputs.visual-regression && '1' || '' }}
119+
VISUAL_REGRESSION_ARGS: ${{ inputs.visual-regression-update && '--update-snapshots' || '' }}
90120
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
91121
GITHUB_ACTION_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
92122
timeout-minutes: 20
@@ -165,3 +195,48 @@ jobs:
165195
header: playwright-e2e-results
166196
append: true
167197
message: ${{ steps.report-summary-success.outputs.summary || steps.report-summary-failure.outputs.summary }}
198+
199+
# Visual regression: after all E2E retries, run comparison and upload results
200+
- name: Upload visual regression baselines (main branch)
201+
if: always() && inputs.visual-regression-update
202+
uses: actions/upload-artifact@v4
203+
with:
204+
name: visual-regression-baselines
205+
path: frontend/e2e/visual-regression-screenshots/
206+
retention-days: 90
207+
overwrite: true
208+
209+
- name: Upload visual regression report
210+
if: always() && inputs.visual-regression && !inputs.visual-regression-update
211+
uses: actions/upload-artifact@v4
212+
with:
213+
name: visual-regression-report-${{ github.run_id }}-${{ strategy.job-index }}
214+
path: frontend/e2e/visual-regression-report/
215+
retention-days: 30
216+
217+
- name: Generate visual regression summary
218+
if: always() && inputs.visual-regression && !inputs.visual-regression-update && github.event_name == 'pull_request'
219+
id: visual-regression-summary
220+
shell: bash
221+
run: |
222+
if [ "${{ steps.download-baseline.outcome }}" != "success" ]; then
223+
echo "message=No baseline found — first run. Baselines will be generated after merge to main." >> $GITHUB_OUTPUT
224+
else
225+
SCREENSHOT_COUNT=$(find frontend/e2e/visual-regression-screenshots -name "*.png" 2>/dev/null | wc -l | tr -d ' ')
226+
REPORT_EXISTS=$(test -d frontend/e2e/visual-regression-report && echo "true" || echo "false")
227+
if [ "$REPORT_EXISTS" = "true" ]; then
228+
echo "message=$SCREENSHOT_COUNT screenshots compared. See report for details." >> $GITHUB_OUTPUT
229+
else
230+
echo "message=$SCREENSHOT_COUNT screenshots captured but comparison did not run." >> $GITHUB_OUTPUT
231+
fi
232+
fi
233+
234+
- name: Comment PR with visual regression results
235+
if: always() && inputs.visual-regression && !inputs.visual-regression-update && github.event_name == 'pull_request' && steps.visual-regression-summary.outputs.message
236+
uses: marocchino/sticky-pull-request-comment@v2
237+
with:
238+
header: visual-regression-results
239+
message: |
240+
## Visual Regression
241+
${{ steps.visual-regression-summary.outputs.message }}
242+
[View full report](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}#artifacts)

.github/workflows/platform-docker-build-test-publish.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ jobs:
8585
e2e-image: ${{ needs.docker-build-e2e.outputs.image }}
8686
api-image: ${{ matrix.args.api-image }}
8787
args: ${{ matrix.args.args }}
88+
# Run visual regression on the enterprise E2E job (which runs all OSS + enterprise tests)
89+
# on a single architecture only, and update baselines since this is the main branch.
90+
visual-regression: ${{ matrix.runs-on == 'depot-ubuntu-latest-16' && contains(matrix.args.args, '@enterprise') }}
91+
visual-regression-update: ${{ matrix.runs-on == 'depot-ubuntu-latest-16' && contains(matrix.args.args, '@enterprise') }}
8892
secrets:
8993
GCR_TOKEN: ${{ secrets.GITHUB_TOKEN }}
9094
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}

.github/workflows/platform-pull-request.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ jobs:
168168
e2e-image: ${{ needs.docker-build-e2e.outputs.image }}
169169
api-image: ${{ needs.docker-build-private-cloud.outputs.image }}
170170
args: --grep "@oss|@enterprise"
171+
visual-regression: ${{ matrix.runs-on == 'depot-ubuntu-latest-16' }}
171172
secrets:
172173
GCR_TOKEN: ${{ needs.permissions-check.outputs.can-write == 'true' && secrets.GITHUB_TOKEN || '' }}
173174
SLACK_TOKEN: ${{ needs.permissions-check.outputs.can-write == 'true' && secrets.SLACK_TOKEN || '' }}

frontend/.gitignore

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,10 @@ e2e/test-results/
3636

3737
*storybook.log
3838
storybook-static
39+
40+
# Visual regression (baselines stored as CI artifacts, not in git)
41+
e2e/visual-regression-snapshots/
42+
e2e/visual-regression-screenshots/
43+
e2e/visual-regression-report/
44+
e2e/tests/_visual-regression-compare.pw.ts
45+
e2e/tests/_visual-regression-compare.pw.ts-snapshots/

frontend/Makefile

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,16 @@ serve:
3131
test:
3232
@echo "Running E2E tests..."
3333
@docker compose run --name e2e-test-run frontend \
34-
sh -c 'npx cross-env E2E_CONCURRENCY=${E2E_CONCURRENCY} E2E_RETRIES=${E2E_RETRIES} npm run test -- $(opts)' \
34+
sh -c 'npx cross-env E2E_CONCURRENCY=${E2E_CONCURRENCY} E2E_RETRIES=${E2E_RETRIES} npm run test -- $(opts); \
35+
EXIT=$$?; \
36+
if [ "$${VISUAL_REGRESSION}" = "1" ]; then npm run test:visual:compare -- $${VISUAL_REGRESSION_ARGS} || true; fi; \
37+
exit $$EXIT' \
3538
|| TEST_FAILED=1; \
3639
echo "Copying test results from container..."; \
3740
docker cp e2e-test-run:/srv/flagsmith/e2e/test-results ./e2e/test-results 2>/dev/null || echo "No test results to copy"; \
3841
docker cp e2e-test-run:/srv/flagsmith/e2e/playwright-report ./e2e/playwright-report 2>/dev/null || echo "No HTML report to copy"; \
42+
docker cp e2e-test-run:/srv/flagsmith/e2e/visual-regression-screenshots ./e2e/visual-regression-screenshots 2>/dev/null || echo "No visual regression screenshots to copy"; \
43+
docker cp e2e-test-run:/srv/flagsmith/e2e/visual-regression-report ./e2e/visual-regression-report 2>/dev/null || echo "No visual regression report to copy"; \
3944
docker rm e2e-test-run 2>/dev/null || true; \
4045
if [ "$$TEST_FAILED" = "1" ]; then \
4146
echo "\n=== API logs ===" && docker compose logs flagsmith-api && \
@@ -50,3 +55,10 @@ test-oss:
5055
.PHONY: test-enterprise
5156
test-enterprise:
5257
@$(MAKE) test opts="--grep @enterprise"
58+
59+
# Visual regression: run E2E tests with screenshot comparison enabled.
60+
# Snapshots are shared via volume mount in docker-compose-e2e-tests.yml.
61+
.PHONY: test-visual
62+
test-visual:
63+
@mkdir -p e2e/visual-regression-snapshots
64+
@VISUAL_REGRESSION=1 $(MAKE) test opts="$(opts)"

frontend/README.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,32 @@ E2E_RETRIES=0 SKIP_BUNDLE=1 E2E_CONCURRENCY=1 npm run test -- tests/flag-tests.p
145145
- `trace.zip` - Interactive trace viewer
146146
- Screenshots and videos
147147

148+
#### Visual Regression
149+
150+
Visual regression screenshots are captured during E2E tests via `visualSnapshot()` calls. They are a no-op unless `VISUAL_REGRESSION=1` is set. Comparison runs as a separate step after all E2E retries complete, so flaky tests don't affect the report.
151+
152+
```bash
153+
# 1. Run E2E tests with screenshot capture (with retries)
154+
VISUAL_REGRESSION=1 npm run test
155+
156+
# 2a. Generate/update baselines from captured screenshots
157+
npm run test:visual:compare -- --update-snapshots
158+
159+
# 2b. Compare screenshots against baselines (generates Playwright report with diffs)
160+
npm run test:visual:compare
161+
162+
# 3. Open the report
163+
npm run test:visual:report
164+
```
165+
166+
Visual diffs never fail CI — they are reported via PR comment and the Playwright HTML report.
167+
168+
Screenshots are saved to `e2e/visual-regression-screenshots/`, baselines to `e2e/visual-regression-snapshots/` (both git-ignored). In CI, the main branch uploads screenshots as baseline artifacts, and PRs download them for comparison.
169+
170+
| Variable | Description |
171+
|----------|-------------|
172+
| `VISUAL_REGRESSION=1` | Enable screenshot capture during E2E tests |
173+
148174
#### Claude Code Commands
149175

150176
When using Claude Code, these commands are available for e2e testing:

frontend/docker-compose-e2e-tests.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,14 @@ services:
4949
FLAGSMITH_API_URL: http://flagsmith-api:8000/api/v1/
5050
SLACK_TOKEN: ${SLACK_TOKEN}
5151
GITHUB_ACTION_URL: ${GITHUB_ACTION_URL}
52+
VISUAL_REGRESSION: ${VISUAL_REGRESSION:-}
5253
ports:
5354
- 3000:3000
5455
depends_on:
5556
flagsmith-api:
5657
condition: service_healthy
57-
58+
volumes:
59+
- ./e2e/visual-regression-snapshots:/srv/flagsmith/e2e/visual-regression-snapshots
5860
links:
5961
- flagsmith-api:flagsmith-api
6062
command: [npm, run, test]
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import * as fs from 'fs'
2+
import * as path from 'path'
3+
4+
const BASELINES_DIR = path.resolve(__dirname, 'visual-regression-snapshots')
5+
const SCREENSHOTS_DIR = path.resolve(__dirname, 'visual-regression-screenshots')
6+
const COMPARE_TEST_FILE = path.resolve(__dirname, 'tests', '_visual-regression-compare.pw.ts')
7+
8+
/**
9+
* Generates a Playwright test file that compares each captured screenshot
10+
* against its baseline using toMatchSnapshot(). Run this AFTER E2E tests
11+
* complete to get a Playwright HTML report with diff viewer.
12+
*
13+
* Screenshots and baselines use the same flat naming convention:
14+
* {testFileName}--{snapshotName}.png (dots replaced with dashes)
15+
* e.g. flag-tests-pw-ts--features-list.png
16+
*/
17+
18+
if (!fs.existsSync(SCREENSHOTS_DIR)) {
19+
console.log('No screenshots found — run E2E tests with VISUAL_REGRESSION=1 first.')
20+
process.exit(0)
21+
}
22+
23+
// Collect screenshots
24+
const screenshots = fs
25+
.readdirSync(SCREENSHOTS_DIR)
26+
.filter((f) => f.endsWith('.png'))
27+
28+
if (screenshots.length === 0) {
29+
console.log('No screenshots to compare.')
30+
process.exit(0)
31+
}
32+
33+
if (!fs.existsSync(BASELINES_DIR)) {
34+
fs.mkdirSync(BASELINES_DIR, { recursive: true })
35+
}
36+
37+
// Build test entries from all screenshots
38+
const pairs: { file: string; label: string }[] = []
39+
for (const png of screenshots) {
40+
const label = png
41+
.replace('.png', '')
42+
.replace(/^(.+?)--(.+)$/, (_, testFile, name) => {
43+
const restored = testFile.replace(/-pw-ts$/, '.pw.ts').replace(/-/g, '.')
44+
return `${restored} / ${name.replace(/-/g, ' ')}`
45+
})
46+
pairs.push({ file: png, label })
47+
}
48+
49+
// Generate Playwright test file
50+
const testCases = pairs
51+
.map(({ file, label }) => {
52+
const screenshotPath = path.join(SCREENSHOTS_DIR, file).replace(/\\/g, '\\\\').replace(/'/g, "\\'")
53+
return `
54+
test('${label}', async () => {
55+
const screenshot = fs.readFileSync('${screenshotPath}')
56+
expect(screenshot).toMatchSnapshot('${file}', {
57+
maxDiffPixels: 300,
58+
threshold: 0.02,
59+
})
60+
})`
61+
})
62+
.join('\n')
63+
64+
const testContent = `// Auto-generated by compare-visual-regression.ts — do not edit
65+
import { test, expect } from '@playwright/test'
66+
import * as fs from 'fs'
67+
68+
test.describe('Visual Regression', () => {
69+
${testCases}
70+
})
71+
`
72+
73+
fs.writeFileSync(COMPARE_TEST_FILE, testContent)
74+
console.log(`Generated ${pairs.length} comparison tests → ${COMPARE_TEST_FILE}`)

frontend/e2e/helpers/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
export * from './utils.playwright';
22
export * from './browser-logging.playwright';
33
export * from './e2e-helpers.playwright';
4+
export * from './visual-regression';

0 commit comments

Comments
 (0)