Flagsmith
diff --git a/‎.github/workflows/.reusable-docker-e2e-tests.yml‎
Lines changed: 21 additions & 12 deletions b/‎.github/workflows/.reusable-docker-e2e-tests.yml‎
Lines changed: 21 additions & 12 deletions
diff --git a/‎frontend/.gitignore‎
Lines changed: 4 additions & 1 deletion b/‎frontend/.gitignore‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎frontend/Makefile‎
Lines changed: 6 additions & 1 deletion b/‎frontend/Makefile‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎frontend/README.md‎
Lines changed: 26 additions & 0 deletions b/‎frontend/README.md‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎frontend/e2e/compare-visual-regression.ts‎
Lines changed: 74 additions & 0 deletions b/‎frontend/e2e/compare-visual-regression.ts‎
Lines changed: 74 additions & 0 deletions
diff --git a/‎frontend/e2e/helpers/visual-regression.ts‎
Lines changed: 140 additions & 0 deletions b/‎frontend/e2e/helpers/visual-regression.ts‎
Lines changed: 140 additions & 0 deletions
diff --git a/‎frontend/e2e/tests/environment-test.pw.ts‎
Lines changed: 2 additions & 0 deletions b/‎frontend/e2e/tests/environment-test.pw.ts‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎frontend/e2e/tests/flag-tests.pw.ts‎
Lines changed: 2 additions & 0 deletions b/‎frontend/e2e/tests/flag-tests.pw.ts‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎frontend/e2e/tests/segment-test.pw.ts‎
Lines changed: 1 addition & 0 deletions b/‎frontend/e2e/tests/segment-test.pw.ts‎
Lines changed: 1 addition & 0 deletions
@@ -110,12 +110,13 @@ jobs:
         working-directory: frontend
         run: make test
         env:
-          opts: ${{ inputs.args }}${{ inputs.visual-regression-update && ' --update-snapshots' || '' }}
+          opts: ${{ inputs.args }}
           API_IMAGE: ${{ inputs.api-image }}
           E2E_IMAGE: ${{ inputs.e2e-image }}
           E2E_CONCURRENCY: ${{ inputs.concurrency }}
           E2E_RETRIES: 2
           VISUAL_REGRESSION: ${{ inputs.visual-regression && '1' || '' }}
+          VISUAL_REGRESSION_ARGS: ${{ inputs.visual-regression-update && '--update-snapshots' || '' }}
           SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
           GITHUB_ACTION_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
         timeout-minutes: 20
@@ -195,35 +196,43 @@ jobs:
           append: true
           message: ${{ steps.report-summary-success.outputs.summary || steps.report-summary-failure.outputs.summary }}
 
-      # Visual regression: upload baselines (for main) or report (for PRs)
-      - name: Upload visual regression baselines
-        if: always() && inputs.visual-regression
+      # Visual regression: after all E2E retries, run comparison and upload results
+      - name: Upload visual regression baselines (main branch)
+        if: always() && inputs.visual-regression-update
         uses: actions/upload-artifact@v4
         with:
           name: visual-regression-baselines
-          path: frontend/e2e/visual-regression-snapshots/
+          path: frontend/e2e/visual-regression-screenshots/
           retention-days: 90
           overwrite: true
 
+      - name: Upload visual regression report
+        if: always() && inputs.visual-regression && !inputs.visual-regression-update
+        uses: actions/upload-artifact@v4
+        with:
+          name: visual-regression-report-${{ github.run_id }}-${{ strategy.job-index }}
+          path: frontend/e2e/visual-regression-report/
+          retention-days: 30
+
       - name: Generate visual regression summary
-        if: always() && inputs.visual-regression && github.event_name == 'pull_request'
+        if: always() && inputs.visual-regression && !inputs.visual-regression-update && github.event_name == 'pull_request'
         id: visual-regression-summary
         shell: bash
         run: |
           if [ "${{ steps.download-baseline.outcome }}" != "success" ]; then
             echo "message=No baseline found — first run. Baselines will be generated after merge to main." >> $GITHUB_OUTPUT
           else
-            DIFF_COUNT=$(find frontend/e2e/test-results -name "*-diff.png" 2>/dev/null | wc -l | tr -d ' ')
-            BASELINE_COUNT=$(find frontend/e2e/visual-regression-snapshots -name "*.png" 2>/dev/null | wc -l | tr -d ' ')
-            if [ "$DIFF_COUNT" = "0" ]; then
-              echo "message=No visual changes detected ($BASELINE_COUNT screenshots matched)." >> $GITHUB_OUTPUT
+            SCREENSHOT_COUNT=$(find frontend/e2e/visual-regression-screenshots -name "*.png" 2>/dev/null | wc -l | tr -d ' ')
+            REPORT_EXISTS=$(test -d frontend/e2e/visual-regression-report && echo "true" || echo "false")
+            if [ "$REPORT_EXISTS" = "true" ]; then
+              echo "message=$SCREENSHOT_COUNT screenshots compared. See report for details." >> $GITHUB_OUTPUT
             else
-              echo "message=$DIFF_COUNT visual change(s) detected. See HTML report for diff images." >> $GITHUB_OUTPUT
+              echo "message=$SCREENSHOT_COUNT screenshots captured but comparison did not run." >> $GITHUB_OUTPUT
             fi
           fi
 
       - name: Comment PR with visual regression results
-        if: always() && inputs.visual-regression && github.event_name == 'pull_request' && steps.visual-regression-summary.outputs.message
+        if: always() && inputs.visual-regression && !inputs.visual-regression-update && github.event_name == 'pull_request' && steps.visual-regression-summary.outputs.message
         uses: marocchino/sticky-pull-request-comment@v2
         with:
           header: visual-regression-results
 
@@ -37,6 +37,9 @@ e2e/test-results/
 *storybook.log
 storybook-static
 
-# Visual regression snapshots (stored as CI artifacts, not in git)
+# Visual regression (baselines stored as CI artifacts, not in git)
 e2e/visual-regression-snapshots/
 e2e/visual-regression-screenshots/
+e2e/visual-regression-report/
+e2e/tests/_visual-regression-compare.pw.ts
+e2e/tests/_visual-regression-compare.pw.ts-snapshots/
@@ -31,11 +31,16 @@ serve:
 test:
 	@echo "Running E2E tests..."
 	@docker compose run --name e2e-test-run frontend \
-		sh -c 'npx cross-env E2E_CONCURRENCY=${E2E_CONCURRENCY} E2E_RETRIES=${E2E_RETRIES} npm run test -- $(opts)' \
+		sh -c 'npx cross-env E2E_CONCURRENCY=${E2E_CONCURRENCY} E2E_RETRIES=${E2E_RETRIES} npm run test -- $(opts); \
+		EXIT=$$?; \
+		if [ "$${VISUAL_REGRESSION}" = "1" ]; then npm run test:visual:compare -- $${VISUAL_REGRESSION_ARGS} || true; fi; \
+		exit $$EXIT' \
 		|| TEST_FAILED=1; \
 	echo "Copying test results from container..."; \
 	docker cp e2e-test-run:/srv/flagsmith/e2e/test-results ./e2e/test-results 2>/dev/null || echo "No test results to copy"; \
 	docker cp e2e-test-run:/srv/flagsmith/e2e/playwright-report ./e2e/playwright-report 2>/dev/null || echo "No HTML report to copy"; \
+	docker cp e2e-test-run:/srv/flagsmith/e2e/visual-regression-screenshots ./e2e/visual-regression-screenshots 2>/dev/null || echo "No visual regression screenshots to copy"; \
+	docker cp e2e-test-run:/srv/flagsmith/e2e/visual-regression-report ./e2e/visual-regression-report 2>/dev/null || echo "No visual regression report to copy"; \
 	docker rm e2e-test-run 2>/dev/null || true; \
 	if [ "$$TEST_FAILED" = "1" ]; then \
 		echo "\n=== API logs ===" && docker compose logs flagsmith-api && \
 
@@ -145,6 +145,32 @@ E2E_RETRIES=0 SKIP_BUNDLE=1 E2E_CONCURRENCY=1 npm run test -- tests/flag-tests.p
     - `trace.zip` - Interactive trace viewer
     - Screenshots and videos
 
+#### Visual Regression
+
+Visual regression screenshots are captured during E2E tests via `visualSnapshot()` calls. They are a no-op unless `VISUAL_REGRESSION=1` is set. Comparison runs as a separate step after all E2E retries complete, so flaky tests don't affect the report.
+
+```bash
+# 1. Run E2E tests with screenshot capture (with retries)
+VISUAL_REGRESSION=1 npm run test
+
+# 2a. Generate/update baselines from captured screenshots
+npm run test:visual:compare -- --update-snapshots
+
+# 2b. Compare screenshots against baselines (generates Playwright report with diffs)
+npm run test:visual:compare
+
+# 3. Open the report
+npm run test:visual:report
+```
+
+Visual diffs never fail CI — they are reported via PR comment and the Playwright HTML report.
+
+Screenshots are saved to `e2e/visual-regression-screenshots/`, baselines to `e2e/visual-regression-snapshots/` (both git-ignored). In CI, the main branch uploads screenshots as baseline artifacts, and PRs download them for comparison.
+
+| Variable | Description |
+|----------|-------------|
+| `VISUAL_REGRESSION=1` | Enable screenshot capture during E2E tests |
+
 #### Claude Code Commands
 
 When using Claude Code, these commands are available for e2e testing:
 
@@ -0,0 +1,74 @@
+import * as fs from 'fs'
+import * as path from 'path'
+
+const BASELINES_DIR = path.resolve(__dirname, 'visual-regression-snapshots')
+const SCREENSHOTS_DIR = path.resolve(__dirname, 'visual-regression-screenshots')
+const COMPARE_TEST_FILE = path.resolve(__dirname, 'tests', '_visual-regression-compare.pw.ts')
+
+/**
+ * Generates a Playwright test file that compares each captured screenshot
+ * against its baseline using toMatchSnapshot(). Run this AFTER E2E tests
+ * complete to get a Playwright HTML report with diff viewer.
+ *
+ * Screenshots and baselines use the same flat naming convention:
+ *   {testFileName}--{snapshotName}.png (dots replaced with dashes)
+ *   e.g. flag-tests-pw-ts--features-list.png
+ */
+
+if (!fs.existsSync(SCREENSHOTS_DIR)) {
+  console.log('No screenshots found — run E2E tests with VISUAL_REGRESSION=1 first.')
+  process.exit(0)
+}
+
+// Collect screenshots
+const screenshots = fs
+  .readdirSync(SCREENSHOTS_DIR)
+  .filter((f) => f.endsWith('.png'))
+
+if (screenshots.length === 0) {
+  console.log('No screenshots to compare.')
+  process.exit(0)
+}
+
+if (!fs.existsSync(BASELINES_DIR)) {
+  fs.mkdirSync(BASELINES_DIR, { recursive: true })
+}
+
+// Build test entries from all screenshots
+const pairs: { file: string; label: string }[] = []
+for (const png of screenshots) {
+  const label = png
+    .replace('.png', '')
+    .replace(/^(.+?)--(.+)$/, (_, testFile, name) => {
+      const restored = testFile.replace(/-pw-ts$/, '.pw.ts').replace(/-/g, '.')
+      return `${restored} / ${name.replace(/-/g, ' ')}`
+    })
+  pairs.push({ file: png, label })
+}
+
+// Generate Playwright test file
+const testCases = pairs
+  .map(({ file, label }) => {
+    const screenshotPath = path.join(SCREENSHOTS_DIR, file).replace(/\\/g, '\\\\').replace(/'/g, "\\'")
+    return `
+  test('${label}', async () => {
+    const screenshot = fs.readFileSync('${screenshotPath}')
+    expect(screenshot).toMatchSnapshot('${file}', {
+      maxDiffPixels: 300,
+      threshold: 0.02,
+    })
+  })`
+  })
+  .join('\n')
+
+const testContent = `// Auto-generated by compare-visual-regression.ts — do not edit
+import { test, expect } from '@playwright/test'
+import * as fs from 'fs'
+
+test.describe('Visual Regression', () => {
+${testCases}
+})
+`
+
+fs.writeFileSync(COMPARE_TEST_FILE, testContent)
+console.log(`Generated ${pairs.length} comparison tests → ${COMPARE_TEST_FILE}`)
@@ -0,0 +1,140 @@
+import { expect, Page, TestInfo } from '@playwright/test'
+import * as fs from 'fs'
+import * as path from 'path'
+
+/**
+ * CSS injected before every visual snapshot to hide dynamic content
+ * that changes between runs. Playwright's toHaveScreenshot() already
+ * handles animations (animations: 'disabled') and caret (caret: 'hide'),
+ * so we only target app-specific volatile elements here.
+ */
+const STABILISING_CSS = `
+  /* Hide environment select (contains dynamic API key) */
+  #environment-select {
+    visibility: hidden !important;
+  }
+
+  /* Hide timestamps and relative dates */
+  .ago,
+  time,
+  [data-test*="timestamp"],
+  [data-test*="ago"],
+  .text-muted:has(> .ago),
+  .relative-date {
+    visibility: hidden !important;
+  }
+
+  /* Hide loading spinners */
+  .spinner,
+  .loader,
+  [class*="spinner"],
+  [class*="loader"] {
+    display: none !important;
+  }
+
+  /* Hide any live chat / support widgets */
+  .intercom-launcher,
+  #intercom-container,
+  .drift-widget,
+  [class*="chatbot"],
+  iframe[title*="chat"],
+  iframe[title*="Chat"] {
+    display: none !important;
+  }
+
+  /* Stabilise scrollbars across platforms */
+  ::-webkit-scrollbar {
+    display: none !important;
+  }
+  * {
+    scrollbar-width: none !important;
+  }
+`
+
+/** Directory where screenshots are captured during E2E runs */
+const SCREENSHOTS_DIR = path.resolve(process.cwd(), 'e2e', 'visual-regression-screenshots')
+
+/** Directory where baselines live (downloaded from main in CI) */
+const BASELINES_DIR = path.resolve(process.cwd(), 'e2e', 'visual-regression-snapshots')
+
+/**
+ * Whether visual regression snapshots are enabled for this run.
+ */
+export function isVisualRegressionEnabled(): boolean {
+  return process.env.VISUAL_REGRESSION === '1'
+}
+
+/**
+ * Wait for the page to settle before taking a screenshot.
+ */
+async function preparePage(page: Page): Promise<void> {
+  await page.addStyleTag({ content: STABILISING_CSS })
+
+  // Wait for images to finish loading
+  await page
+    .evaluate(() => {
+      return Promise.all(
+        Array.from(document.images)
+          .filter((img) => !img.complete)
+          .map(
+            (img) =>
+              new Promise((resolve) => {
+                img.addEventListener('load', resolve)
+                img.addEventListener('error', resolve)
+                setTimeout(resolve, 5000)
+              }),
+          ),
+      )
+    })
+    .catch(() => {})
+
+  // Double rAF to ensure paint is complete
+  await page.evaluate(() => {
+    return new Promise((resolve) => {
+      requestAnimationFrame(() => {
+        requestAnimationFrame(() => {
+          resolve(undefined)
+        })
+      })
+    })
+  })
+
+  // Small settle time for any final layout shifts
+  await page.waitForTimeout(500)
+}
+
+/**
+ * Take a screenshot during E2E tests and save it to the screenshots directory.
+ *
+ * This ONLY captures the screenshot — it does NOT compare against baselines.
+ * Comparison happens as a separate step after all E2E retries have completed,
+ * via `npx tsx e2e/compare-visual-regression.ts`.
+ *
+ * @param page     Playwright page
+ * @param name     Descriptive snapshot name, e.g. "features-list"
+ * @param testInfo Playwright testInfo for resolving the test file name
+ */
+export async function visualSnapshot(
+  page: Page,
+  name: string,
+  testInfo: TestInfo,
+): Promise<void> {
+  if (!isVisualRegressionEnabled()) return
+
+  await preparePage(page)
+
+  // Save with the sanitised name Playwright's toMatchSnapshot expects:
+  // {testFileName}--{name} with dots replaced by dashes
+  const testFileName = path.basename(testInfo.file)
+  const sanitisedName = `${testFileName}--${name}`.replace(/\./g, '-')
+  fs.mkdirSync(SCREENSHOTS_DIR, { recursive: true })
+
+  const screenshotPath = path.join(SCREENSHOTS_DIR, `${sanitisedName}.png`)
+  await page.screenshot({
+    path: screenshotPath,
+    fullPage: true,
+    animations: 'disabled',
+    caret: 'hide',
+    scale: 'css',
+  })
+}
@@ -11,6 +11,7 @@ test.describe('Environment Tests', () => {
       login,
       setText,
       waitForElementVisible,
+      waitForToastsToClear,
     } = createHelpers(page);
 
     log('Login')
@@ -20,6 +21,7 @@ test.describe('Environment Tests', () => {
     log('Create environment')
     await click('#create-env-link')
     await createEnvironment('Staging')
+    await waitForToastsToClear()
     await visualSnapshot(page, 'environment-created', testInfo)
 
     log('Edit Environment')
 
@@ -19,6 +19,7 @@ test.describe('Flag Tests', () => {
       waitForElementClickable,
       waitForElementVisible,
       waitForFeatureSwitch,
+      waitForToastsToClear,
     } = createHelpers(page);
 
     log('Login')
@@ -51,6 +52,7 @@ test.describe('Flag Tests', () => {
       { value: 'small', weight: 0 },
     ]})
 
+    await waitForToastsToClear()
     await visualSnapshot(page, 'features-list', testInfo)
 
     log('Create Short Life Feature')
 
@@ -114,6 +114,7 @@ test('Segment test 1 - Create, update, and manage segments with multivariate fla
   await assertInputValue(byId(`rule-${0}-value-0`), `${lastRule.value + 1}`)
   await deleteSegmentFromPage('segment_to_update')
 
+  await waitForToastsToClear()
   await visualSnapshot(page, 'segments-list', testInfo)
 
   log('Create segment')