Flagsmith
diff --git a/‎.github/workflows/.reusable-docker-e2e-tests.yml‎
Lines changed: 75 additions & 0 deletions b/‎.github/workflows/.reusable-docker-e2e-tests.yml‎
Lines changed: 75 additions & 0 deletions
diff --git a/‎.github/workflows/platform-docker-build-test-publish.yml‎
Lines changed: 4 additions & 0 deletions b/‎.github/workflows/platform-docker-build-test-publish.yml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.github/workflows/platform-pull-request.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/platform-pull-request.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎frontend/.gitignore‎
Lines changed: 7 additions & 0 deletions b/‎frontend/.gitignore‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎frontend/Makefile‎
Lines changed: 13 additions & 1 deletion b/‎frontend/Makefile‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎frontend/README.md‎
Lines changed: 26 additions & 0 deletions b/‎frontend/README.md‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎frontend/docker-compose-e2e-tests.yml‎
Lines changed: 3 additions & 1 deletion b/‎frontend/docker-compose-e2e-tests.yml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎frontend/e2e/compare-visual-regression.ts‎
Lines changed: 74 additions & 0 deletions b/‎frontend/e2e/compare-visual-regression.ts‎
Lines changed: 74 additions & 0 deletions
diff --git a/‎frontend/e2e/helpers/index.ts‎
Lines changed: 1 addition & 0 deletions b/‎frontend/e2e/helpers/index.ts‎
Lines changed: 1 addition & 0 deletions
@@ -27,6 +27,16 @@ on:
         description: The runner label to use. Defaults to `depot-ubuntu-latest`
         required: false
         default: depot-ubuntu-latest
+      visual-regression:
+        type: boolean
+        description: Enable visual regression screenshot comparison
+        required: false
+        default: false
+      visual-regression-update:
+        type: boolean
+        description: Update visual regression baselines (use on main branch)
+        required: false
+        default: false
     secrets:
       GCR_TOKEN:
         description: A token to use for logging into Github Container Registry. If not provided, login does not occur.
@@ -78,6 +88,24 @@ jobs:
       - name: Login to Depot Registry
         run: depot pull-token | docker login -u x-token --password-stdin registry.depot.dev
 
+      - name: Prepare visual regression snapshots directory
+        if: inputs.visual-regression
+        working-directory: frontend
+        run: mkdir -p e2e/visual-regression-snapshots
+
+      - name: Download visual regression baselines
+        if: inputs.visual-regression
+        id: download-baseline
+        continue-on-error: true
+        uses: dawidd6/action-download-artifact@v6
+        with:
+          github_token: ${{ secrets.GCR_TOKEN }}
+          workflow: platform-docker-build-test-publish.yml
+          branch: main
+          name: visual-regression-baselines
+          path: frontend/e2e/visual-regression-snapshots/
+          if_no_artifact_found: warn
+
       - name: Run tests on dockerised frontend
         working-directory: frontend
         run: make test
@@ -87,6 +115,8 @@ jobs:
           E2E_IMAGE: ${{ inputs.e2e-image }}
           E2E_CONCURRENCY: ${{ inputs.concurrency }}
           E2E_RETRIES: 2
+          VISUAL_REGRESSION: ${{ inputs.visual-regression && '1' || '' }}
+          VISUAL_REGRESSION_ARGS: ${{ inputs.visual-regression-update && '--update-snapshots' || '' }}
           SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
           GITHUB_ACTION_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
         timeout-minutes: 20
@@ -165,3 +195,48 @@ jobs:
           header: playwright-e2e-results
           append: true
           message: ${{ steps.report-summary-success.outputs.summary || steps.report-summary-failure.outputs.summary }}
+
+      # Visual regression: after all E2E retries, run comparison and upload results
+      - name: Upload visual regression baselines (main branch)
+        if: always() && inputs.visual-regression-update
+        uses: actions/upload-artifact@v4
+        with:
+          name: visual-regression-baselines
+          path: frontend/e2e/visual-regression-screenshots/
+          retention-days: 90
+          overwrite: true
+
+      - name: Upload visual regression report
+        if: always() && inputs.visual-regression && !inputs.visual-regression-update
+        uses: actions/upload-artifact@v4
+        with:
+          name: visual-regression-report-${{ github.run_id }}-${{ strategy.job-index }}
+          path: frontend/e2e/visual-regression-report/
+          retention-days: 30
+
+      - name: Generate visual regression summary
+        if: always() && inputs.visual-regression && !inputs.visual-regression-update && github.event_name == 'pull_request'
+        id: visual-regression-summary
+        shell: bash
+        run: |
+          if [ "${{ steps.download-baseline.outcome }}" != "success" ]; then
+            echo "message=No baseline found — first run. Baselines will be generated after merge to main." >> $GITHUB_OUTPUT
+          else
+            SCREENSHOT_COUNT=$(find frontend/e2e/visual-regression-screenshots -name "*.png" 2>/dev/null | wc -l | tr -d ' ')
+            REPORT_EXISTS=$(test -d frontend/e2e/visual-regression-report && echo "true" || echo "false")
+            if [ "$REPORT_EXISTS" = "true" ]; then
+              echo "message=$SCREENSHOT_COUNT screenshots compared. See report for details." >> $GITHUB_OUTPUT
+            else
+              echo "message=$SCREENSHOT_COUNT screenshots captured but comparison did not run." >> $GITHUB_OUTPUT
+            fi
+          fi
+
+      - name: Comment PR with visual regression results
+        if: always() && inputs.visual-regression && !inputs.visual-regression-update && github.event_name == 'pull_request' && steps.visual-regression-summary.outputs.message
+        uses: marocchino/sticky-pull-request-comment@v2
+        with:
+          header: visual-regression-results
+          message: |
+            ## Visual Regression
+            ${{ steps.visual-regression-summary.outputs.message }}
+            [View full report](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}#artifacts)
@@ -85,6 +85,10 @@ jobs:
       e2e-image: ${{ needs.docker-build-e2e.outputs.image }}
       api-image: ${{ matrix.args.api-image }}
       args: ${{ matrix.args.args }}
+      # Run visual regression on the enterprise E2E job (which runs all OSS + enterprise tests)
+      # on a single architecture only, and update baselines since this is the main branch.
+      visual-regression: ${{ matrix.runs-on == 'depot-ubuntu-latest-16' && contains(matrix.args.args, '@enterprise') }}
+      visual-regression-update: ${{ matrix.runs-on == 'depot-ubuntu-latest-16' && contains(matrix.args.args, '@enterprise') }}
     secrets:
       GCR_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
 
@@ -168,6 +168,7 @@ jobs:
       e2e-image: ${{ needs.docker-build-e2e.outputs.image }}
       api-image: ${{ needs.docker-build-private-cloud.outputs.image }}
       args: --grep "@oss|@enterprise"
+      visual-regression: ${{ matrix.runs-on == 'depot-ubuntu-latest-16' }}
     secrets:
       GCR_TOKEN: ${{ needs.permissions-check.outputs.can-write == 'true' && secrets.GITHUB_TOKEN || '' }}
       SLACK_TOKEN: ${{ needs.permissions-check.outputs.can-write == 'true' && secrets.SLACK_TOKEN || '' }}
 
@@ -36,3 +36,10 @@ e2e/test-results/
 
 *storybook.log
 storybook-static
+
+# Visual regression (baselines stored as CI artifacts, not in git)
+e2e/visual-regression-snapshots/
+e2e/visual-regression-screenshots/
+e2e/visual-regression-report/
+e2e/tests/_visual-regression-compare.pw.ts
+e2e/tests/_visual-regression-compare.pw.ts-snapshots/
@@ -31,11 +31,16 @@ serve:
 test:
 	@echo "Running E2E tests..."
 	@docker compose run --name e2e-test-run frontend \
-		sh -c 'npx cross-env E2E_CONCURRENCY=${E2E_CONCURRENCY} E2E_RETRIES=${E2E_RETRIES} npm run test -- $(opts)' \
+		sh -c 'npx cross-env E2E_CONCURRENCY=${E2E_CONCURRENCY} E2E_RETRIES=${E2E_RETRIES} npm run test -- $(opts); \
+		EXIT=$$?; \
+		if [ "$${VISUAL_REGRESSION}" = "1" ]; then npm run test:visual:compare -- $${VISUAL_REGRESSION_ARGS} || true; fi; \
+		exit $$EXIT' \
 		|| TEST_FAILED=1; \
 	echo "Copying test results from container..."; \
 	docker cp e2e-test-run:/srv/flagsmith/e2e/test-results ./e2e/test-results 2>/dev/null || echo "No test results to copy"; \
 	docker cp e2e-test-run:/srv/flagsmith/e2e/playwright-report ./e2e/playwright-report 2>/dev/null || echo "No HTML report to copy"; \
+	docker cp e2e-test-run:/srv/flagsmith/e2e/visual-regression-screenshots ./e2e/visual-regression-screenshots 2>/dev/null || echo "No visual regression screenshots to copy"; \
+	docker cp e2e-test-run:/srv/flagsmith/e2e/visual-regression-report ./e2e/visual-regression-report 2>/dev/null || echo "No visual regression report to copy"; \
 	docker rm e2e-test-run 2>/dev/null || true; \
 	if [ "$$TEST_FAILED" = "1" ]; then \
 		echo "\n=== API logs ===" && docker compose logs flagsmith-api && \
@@ -50,3 +55,10 @@ test-oss:
 .PHONY: test-enterprise
 test-enterprise:
 	@$(MAKE) test opts="--grep @enterprise"
+
+# Visual regression: run E2E tests with screenshot comparison enabled.
+# Snapshots are shared via volume mount in docker-compose-e2e-tests.yml.
+.PHONY: test-visual
+test-visual:
+	@mkdir -p e2e/visual-regression-snapshots
+	@VISUAL_REGRESSION=1 $(MAKE) test opts="$(opts)"
@@ -145,6 +145,32 @@ E2E_RETRIES=0 SKIP_BUNDLE=1 E2E_CONCURRENCY=1 npm run test -- tests/flag-tests.p
     - `trace.zip` - Interactive trace viewer
     - Screenshots and videos
 
+#### Visual Regression
+
+Visual regression screenshots are captured during E2E tests via `visualSnapshot()` calls. They are a no-op unless `VISUAL_REGRESSION=1` is set. Comparison runs as a separate step after all E2E retries complete, so flaky tests don't affect the report.
+
+```bash
+# 1. Run E2E tests with screenshot capture (with retries)
+VISUAL_REGRESSION=1 npm run test
+
+# 2a. Generate/update baselines from captured screenshots
+npm run test:visual:compare -- --update-snapshots
+
+# 2b. Compare screenshots against baselines (generates Playwright report with diffs)
+npm run test:visual:compare
+
+# 3. Open the report
+npm run test:visual:report
+```
+
+Visual diffs never fail CI — they are reported via PR comment and the Playwright HTML report.
+
+Screenshots are saved to `e2e/visual-regression-screenshots/`, baselines to `e2e/visual-regression-snapshots/` (both git-ignored). In CI, the main branch uploads screenshots as baseline artifacts, and PRs download them for comparison.
+
+| Variable | Description |
+|----------|-------------|
+| `VISUAL_REGRESSION=1` | Enable screenshot capture during E2E tests |
+
 #### Claude Code Commands
 
 When using Claude Code, these commands are available for e2e testing:
 
@@ -49,12 +49,14 @@ services:
       FLAGSMITH_API_URL: http://flagsmith-api:8000/api/v1/
       SLACK_TOKEN: ${SLACK_TOKEN}
       GITHUB_ACTION_URL: ${GITHUB_ACTION_URL}
+      VISUAL_REGRESSION: ${VISUAL_REGRESSION:-}
     ports:
       - 3000:3000
     depends_on:
       flagsmith-api:
         condition: service_healthy
-
+    volumes:
+      - ./e2e/visual-regression-snapshots:/srv/flagsmith/e2e/visual-regression-snapshots
     links:
       - flagsmith-api:flagsmith-api
     command: [npm, run, test]
@@ -0,0 +1,74 @@
+import * as fs from 'fs'
+import * as path from 'path'
+
+const BASELINES_DIR = path.resolve(__dirname, 'visual-regression-snapshots')
+const SCREENSHOTS_DIR = path.resolve(__dirname, 'visual-regression-screenshots')
+const COMPARE_TEST_FILE = path.resolve(__dirname, 'tests', '_visual-regression-compare.pw.ts')
+
+/**
+ * Generates a Playwright test file that compares each captured screenshot
+ * against its baseline using toMatchSnapshot(). Run this AFTER E2E tests
+ * complete to get a Playwright HTML report with diff viewer.
+ *
+ * Screenshots and baselines use the same flat naming convention:
+ *   {testFileName}--{snapshotName}.png (dots replaced with dashes)
+ *   e.g. flag-tests-pw-ts--features-list.png
+ */
+
+if (!fs.existsSync(SCREENSHOTS_DIR)) {
+  console.log('No screenshots found — run E2E tests with VISUAL_REGRESSION=1 first.')
+  process.exit(0)
+}
+
+// Collect screenshots
+const screenshots = fs
+  .readdirSync(SCREENSHOTS_DIR)
+  .filter((f) => f.endsWith('.png'))
+
+if (screenshots.length === 0) {
+  console.log('No screenshots to compare.')
+  process.exit(0)
+}
+
+if (!fs.existsSync(BASELINES_DIR)) {
+  fs.mkdirSync(BASELINES_DIR, { recursive: true })
+}
+
+// Build test entries from all screenshots
+const pairs: { file: string; label: string }[] = []
+for (const png of screenshots) {
+  const label = png
+    .replace('.png', '')
+    .replace(/^(.+?)--(.+)$/, (_, testFile, name) => {
+      const restored = testFile.replace(/-pw-ts$/, '.pw.ts').replace(/-/g, '.')
+      return `${restored} / ${name.replace(/-/g, ' ')}`
+    })
+  pairs.push({ file: png, label })
+}
+
+// Generate Playwright test file
+const testCases = pairs
+  .map(({ file, label }) => {
+    const screenshotPath = path.join(SCREENSHOTS_DIR, file).replace(/\\/g, '\\\\').replace(/'/g, "\\'")
+    return `
+  test('${label}', async () => {
+    const screenshot = fs.readFileSync('${screenshotPath}')
+    expect(screenshot).toMatchSnapshot('${file}', {
+      maxDiffPixels: 300,
+      threshold: 0.02,
+    })
+  })`
+  })
+  .join('\n')
+
+const testContent = `// Auto-generated by compare-visual-regression.ts — do not edit
+import { test, expect } from '@playwright/test'
+import * as fs from 'fs'
+
+test.describe('Visual Regression', () => {
+${testCases}
+})
+`
+
+fs.writeFileSync(COMPARE_TEST_FILE, testContent)
+console.log(`Generated ${pairs.length} comparison tests → ${COMPARE_TEST_FILE}`)
@@ -1,3 +1,4 @@
 export * from './utils.playwright';
 export * from './browser-logging.playwright';
 export * from './e2e-helpers.playwright';
+export * from './visual-regression';