Skip to content

Commit 4ebcf8b

Browse files
committed
feat: Phase 4 optimizations - executable playbooks, visual regression, structured logging, parallel checks, enhanced excel reports
1 parent f87fee9 commit 4ebcf8b

14 files changed

Lines changed: 367 additions & 45 deletions

File tree

10.2 KB
Loading

agent/memory/test-history.json

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,5 +46,37 @@
4646
"date": "2026-06-08T02:53:16.083Z",
4747
"status": "Partial Pass",
4848
"reportExcel": "C:\\Users\\MOHIT KUMAR\\OneDrive\\Documents\\QaAgent\\agent\\reports\\2026-06-08-08-23-agent-report.xlsx"
49+
},
50+
{
51+
"websiteUrl": "https://example.com",
52+
"mode": "codex",
53+
"date": "2026-06-08T03:34:51.398Z",
54+
"status": "Partial Pass",
55+
"reportMarkdown": "C:\\Users\\MOHIT KUMAR\\OneDrive\\Documents\\QaAgent\\agent\\reports\\2026-06-08-09-05-agent-report.md",
56+
"reportJson": "C:\\Users\\MOHIT KUMAR\\OneDrive\\Documents\\QaAgent\\agent\\reports\\2026-06-08-09-05-agent-report.json",
57+
"reportExcel": "C:\\Users\\MOHIT KUMAR\\OneDrive\\Documents\\QaAgent\\agent\\reports\\2026-06-08-09-05-agent-report.xlsx"
58+
},
59+
{
60+
"websiteUrl": "https://example.com",
61+
"mode": "codex",
62+
"date": "2026-06-08T03:35:28.843Z",
63+
"status": "Partial Pass",
64+
"reportExcel": "C:\\Users\\MOHIT KUMAR\\OneDrive\\Documents\\QaAgent\\agent\\reports\\2026-06-08-09-06-agent-report.xlsx"
65+
},
66+
{
67+
"websiteUrl": "https://example.com",
68+
"mode": "codex",
69+
"date": "2026-06-08T03:36:17.207Z",
70+
"status": "Partial Pass",
71+
"reportMarkdown": "C:\\Users\\MOHIT KUMAR\\OneDrive\\Documents\\QaAgent\\agent\\reports\\2026-06-08-09-06-agent-report.md",
72+
"reportJson": "C:\\Users\\MOHIT KUMAR\\OneDrive\\Documents\\QaAgent\\agent\\reports\\2026-06-08-09-06-agent-report.json",
73+
"reportExcel": "C:\\Users\\MOHIT KUMAR\\OneDrive\\Documents\\QaAgent\\agent\\reports\\2026-06-08-09-06-agent-report.xlsx"
74+
},
75+
{
76+
"websiteUrl": "https://example.com",
77+
"mode": "codex",
78+
"date": "2026-06-08T03:36:50.629Z",
79+
"status": "Partial Pass",
80+
"reportExcel": "C:\\Users\\MOHIT KUMAR\\OneDrive\\Documents\\QaAgent\\agent\\reports\\2026-06-08-09-07-agent-report.xlsx"
4981
}
5082
]

agent/src/api-agent/groq-tool-loop.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import { runConfiguredLogin } from "../browser/login-runner.js";
44
import { createRandomLeads } from "../data/lead-data.js";
55
import { buildCoverageSummary, statusWithCoverage } from "../qa/coverage.js";
66
import { runQaEngine } from "../qa/qa-engine.js";
7+
import { runPlaybookChecks } from "../qa/playbook-runner.js";
8+
import { detectVisualRegression } from "../qa/detectors/visual-detector.js";
79
import type { LeadData, QaTask, RunContext } from "../shared/types.js";
810
import { assertSafeAction } from "../shared/safety-guard.js";
911
import { sitesMemory } from "../memory/sites-memory.js";
@@ -105,7 +107,14 @@ export async function runGroqToolLoop(task: QaTask, headed: boolean, maxSteps: n
105107
}
106108

107109
const state = await browser.saveBrowserState(screenshots.at(-1));
108-
const detected = runQaEngine(task.qaProfile, state, browser.getConsoleErrors(), browser.getNetworkErrors(), task.scope);
110+
const precomputed = await runPlaybookChecks(task.qaProfile, browser, state);
111+
const detected = runQaEngine(task.qaProfile, state, browser.getConsoleErrors(), browser.getNetworkErrors(), task.scope, precomputed);
112+
113+
const latestScreenshot = screenshots.at(-1);
114+
if (latestScreenshot) {
115+
const visualRegressions = await detectVisualRegression(latestScreenshot, state.url, "final");
116+
detected.uxIssues.push(...visualRegressions);
117+
}
109118
const tracePath = await browser.saveTrace();
110119
const stepsPerformed = [...browser.recorder.all(), "Groq tool loop completed."];
111120
const coverage = buildCoverageSummary({

agent/src/codex-agent/codex-driver.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import { runConfiguredLogin } from "../browser/login-runner.js";
44
import { createRandomLeads } from "../data/lead-data.js";
55
import { buildCoverageSummary, statusWithCoverage } from "../qa/coverage.js";
66
import { runQaEngine } from "../qa/qa-engine.js";
7+
import { runPlaybookChecks } from "../qa/playbook-runner.js";
8+
import { detectVisualRegression } from "../qa/detectors/visual-detector.js";
79
import type { WrittenReports } from "../reports/report-writer.js";
810
import { runExplicitTaskSteps } from "./codex-task-runner.js";
911
import { finalizeCodexReport } from "./codex-report-helper.js";
@@ -35,7 +37,14 @@ export async function runCodexDriver(task: QaTask, headed: boolean): Promise<{ c
3537
screenshots.push(...explorerResult.screenshots);
3638

3739
const state = await browser.saveBrowserState(screenshots.at(-1));
38-
const detected = runQaEngine(task.qaProfile, state, browser.getConsoleErrors(), browser.getNetworkErrors(), task.scope);
40+
const precomputed = await runPlaybookChecks(task.qaProfile, browser, state);
41+
const detected = runQaEngine(task.qaProfile, state, browser.getConsoleErrors(), browser.getNetworkErrors(), task.scope, precomputed);
42+
43+
const latestScreenshot = screenshots.at(-1);
44+
if (latestScreenshot) {
45+
const visualRegressions = await detectVisualRegression(latestScreenshot, state.url, "final");
46+
detected.uxIssues.push(...visualRegressions);
47+
}
3948

4049
// Merge explorer-discovered issues with detector-found issues
4150
const allBugs = [...detected.bugs, ...explorerResult.bugs];

agent/src/index.ts

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { runCodexDriver } from "./codex-agent/codex-driver.js";
44
import { runGroqToolLoop } from "./api-agent/groq-tool-loop.js";
55
import { BrowserAgent } from "./browser/browser-agent.js";
66
import { runConfiguredLogin } from "./browser/login-runner.js";
7+
import { logger } from "./shared/logger.js";
78

89
async function main(): Promise<void> {
910
const options = parseCli();
@@ -19,11 +20,11 @@ async function main(): Promise<void> {
1920
const login = await runConfiguredLogin(browser, options.task, screenshots);
2021
const screenshot = screenshots.at(-1);
2122
const state = await browser.saveBrowserState(screenshot);
22-
console.log("\nBrowser state saved.");
23-
console.log("State JSON: agent/artifacts/state/latest-browser-state.json");
24-
if (screenshot) console.log(`Screenshot: ${screenshot}`);
25-
console.log(`Login: ${login.resultText}`);
26-
console.log(`Clickable elements: ${state.clickableElements.length}`);
23+
logger.info("Browser state saved.");
24+
logger.info(`State JSON: agent/artifacts/state/latest-browser-state.json`);
25+
if (screenshot) logger.info(`Screenshot: ${screenshot}`);
26+
logger.info(`Login: ${login.resultText}`);
27+
logger.info(`Clickable elements: ${state.clickableElements.length}`);
2728
} finally {
2829
await browser.close();
2930
}
@@ -33,17 +34,17 @@ async function main(): Promise<void> {
3334
? await runGroqToolLoop(options.task, options.headed, options.maxSteps, config)
3435
: await runCodexDriver(options.task, options.headed);
3536

36-
console.log("\nQA run complete.");
37-
if (result.reports.markdownPath) console.log(`Markdown report: ${result.reports.markdownPath}`);
38-
if (result.reports.jsonPath) console.log(`JSON report: ${result.reports.jsonPath}`);
39-
if (result.reports.excelPath) console.log(`Excel report: ${result.reports.excelPath}`);
40-
console.log(`Final status: ${result.context.finalStatus}`);
37+
logger.info("QA run complete.");
38+
if (result.reports.markdownPath) logger.info(`Markdown report: ${result.reports.markdownPath}`);
39+
if (result.reports.jsonPath) logger.info(`JSON report: ${result.reports.jsonPath}`);
40+
if (result.reports.excelPath) logger.info(`Excel report: ${result.reports.excelPath}`);
41+
logger.info(`Final status: ${result.context.finalStatus}`);
4142
if (result.context.finalStatus === "Fail" && result.context.bugs[0]) {
42-
console.log(`Top issue: ${result.context.bugs[0].title} - ${result.context.bugs[0].description}`);
43+
logger.warn(`Top issue: ${result.context.bugs[0].title} - ${result.context.bugs[0].description}`);
4344
}
4445
}
4546

4647
main().catch((error) => {
47-
console.error(error instanceof Error ? error.message : String(error));
48+
logger.error("Application crashed", error);
4849
process.exitCode = 1;
4950
});
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import fs from "node:fs";
2+
import path from "node:path";
3+
import pixelmatch from "pixelmatch";
4+
import { PNG } from "pngjs";
5+
import { ensureDir } from "../../shared/utils.js";
6+
import type { QaIssue } from "../../shared/types.js";
7+
8+
/**
9+
* Detects visual differences between current screenshot and baseline screenshot.
10+
* If baseline doesn't exist, it creates one.
11+
*/
12+
export async function detectVisualRegression(
13+
currentScreenshotPath: string,
14+
url: string,
15+
moduleName = "default"
16+
): Promise<QaIssue[]> {
17+
const issues: QaIssue[] = [];
18+
if (!currentScreenshotPath || !fs.existsSync(currentScreenshotPath)) {
19+
return issues;
20+
}
21+
22+
// Create baseline directory
23+
const baselineDir = path.join(process.cwd(), "agent", "artifacts", "baselines");
24+
ensureDir(baselineDir);
25+
26+
// Create diff directory
27+
const diffDir = path.join(process.cwd(), "agent", "artifacts", "visual-diffs");
28+
ensureDir(diffDir);
29+
30+
// Normalize URL to a safe filename
31+
const safeUrl = url.replace(/[^a-z0-9]+/gi, "-").toLowerCase().slice(0, 100);
32+
const baselineFilename = `${safeUrl}-${moduleName}.png`;
33+
const baselinePath = path.join(baselineDir, baselineFilename);
34+
35+
// If baseline doesn't exist, save current screenshot as baseline and return empty issues
36+
if (!fs.existsSync(baselinePath)) {
37+
try {
38+
fs.copyFileSync(currentScreenshotPath, baselinePath);
39+
} catch {
40+
// Ignore copy error
41+
}
42+
return issues;
43+
}
44+
45+
try {
46+
const img1 = PNG.sync.read(fs.readFileSync(baselinePath));
47+
const img2 = PNG.sync.read(fs.readFileSync(currentScreenshotPath));
48+
49+
const { width, height } = img1;
50+
// Handle size mismatches
51+
if (img2.width !== width || img2.height !== height) {
52+
issues.push({
53+
title: "Visual baseline size mismatch",
54+
severity: "Low",
55+
area: "Visual Regression",
56+
description: `Current screenshot size (${img2.width}x${img2.height}) does not match baseline screenshot size (${width}x${height}).`,
57+
suggestedFix: "Run tests under identical viewport dimensions, or update the visual baseline if the page layout has changed."
58+
});
59+
return issues;
60+
}
61+
62+
const diff = new PNG({ width, height });
63+
const diffPixels = pixelmatch(img1.data, img2.data, diff.data, width, height, { threshold: 0.1 });
64+
65+
// If there is a meaningful difference (e.g. > 500 pixels)
66+
if (diffPixels > 500) {
67+
const diffPath = path.join(diffDir, `diff-${baselineFilename}`);
68+
fs.writeFileSync(diffPath, PNG.sync.write(diff));
69+
70+
issues.push({
71+
title: "Visual regression detected",
72+
severity: "Medium",
73+
area: "Visual Regression",
74+
description: `Visual difference of ${diffPixels} pixels detected compared to baseline screenshot. Diff saved to visual-diffs/diff-${baselineFilename}`,
75+
evidence: diffPath,
76+
suggestedFix: "Compare the screenshot with the diff file to see layout/style regressions, and update baseline if correct."
77+
});
78+
}
79+
} catch (err: any) {
80+
// Fail silently on image parse issues
81+
}
82+
83+
return issues;
84+
}

agent/src/qa/playbook-runner.ts

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,61 @@
11
import type { BrowserState, QaProfile } from "../shared/types.js";
22
import { playbooksForProfile } from "./playbooks/index.js";
3+
import type { BrowserAgent } from "../browser/browser-agent.js";
34

4-
export function buildQaChecklist(profile: QaProfile, state?: BrowserState): Record<string, string> {
5+
export async function runPlaybookChecks(
6+
profile: QaProfile,
7+
browser: BrowserAgent,
8+
state?: BrowserState
9+
): Promise<Record<string, string>> {
10+
const checklist: Record<string, string> = {};
11+
const promises: Array<Promise<{ key: string; value: string }>> = [];
12+
13+
for (const playbook of playbooksForProfile(profile)) {
14+
for (const check of playbook.checks) {
15+
const checkName = typeof check === "string" ? check : check.name;
16+
const key = `${playbook.name}: ${checkName}`;
17+
if (typeof check !== "string" && check.run) {
18+
const runFn = check.run;
19+
promises.push(
20+
(async () => {
21+
try {
22+
const res = await runFn(browser, state);
23+
const status = typeof res === "boolean" ? (res ? "Pass" : "Fail") : res;
24+
return { key, value: status };
25+
} catch (err: any) {
26+
return { key, value: `Error: ${err.message}` };
27+
}
28+
})()
29+
);
30+
} else {
31+
checklist[key] = inferStatus(checkName, state);
32+
}
33+
}
34+
}
35+
36+
const results = await Promise.all(promises);
37+
for (const res of results) {
38+
checklist[res.key] = res.value;
39+
}
40+
41+
return checklist;
42+
}
43+
44+
export function buildQaChecklist(
45+
profile: QaProfile,
46+
state?: BrowserState,
47+
precomputedCheckResults?: Record<string, string>
48+
): Record<string, string> {
549
const checklist: Record<string, string> = {};
650
for (const playbook of playbooksForProfile(profile)) {
751
for (const check of playbook.checks) {
8-
checklist[`${playbook.name}: ${check}`] = inferStatus(check, state);
52+
const checkName = typeof check === "string" ? check : check.name;
53+
const key = `${playbook.name}: ${checkName}`;
54+
if (precomputedCheckResults && key in precomputedCheckResults) {
55+
checklist[key] = precomputedCheckResults[key];
56+
} else {
57+
checklist[key] = inferStatus(checkName, state);
58+
}
959
}
1060
}
1161
return checklist;
Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
11
import type { QaProfile } from "../../shared/types.js";
2+
import type { BrowserAgent } from "../../browser/browser-agent.js";
3+
import type { BrowserState } from "../../shared/types.js";
4+
5+
export interface PlaybookCheck {
6+
name: string;
7+
run?: (browser: BrowserAgent, state?: BrowserState) => Promise<string | boolean> | string | boolean;
8+
}
29

310
export interface QaPlaybook {
411
id: string;
512
name: string;
613
profiles: QaProfile[];
7-
checks: string[];
14+
checks: Array<string | PlaybookCheck>;
815
}
916

agent/src/qa/playbooks/smoke.playbook.ts

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,42 @@ export const smokePlaybook: QaPlaybook = {
55
name: "Smoke",
66
profiles: ["smoke", "functional", "regression-basic", "full-professional"],
77
checks: [
8-
"Page loads",
9-
"No blocker console errors",
10-
"No blocker network errors",
11-
"Primary navigation visible",
12-
"Screenshot captured"
8+
{
9+
name: "Page loads",
10+
run: async (browser) => {
11+
const url = browser.getUrl();
12+
return url && url !== "about:blank" && !url.startsWith("data:") ? "Pass" : "Fail";
13+
}
14+
},
15+
{
16+
name: "No blocker console errors",
17+
run: async (browser) => {
18+
const errors = browser.getConsoleErrors();
19+
const blockerErrors = errors.filter(e => e.toLowerCase().includes("error") || e.toLowerCase().includes("failed"));
20+
return blockerErrors.length === 0 ? "Pass" : `Fail (${blockerErrors.length} console errors)`;
21+
}
22+
},
23+
{
24+
name: "No blocker network errors",
25+
run: async (browser) => {
26+
const errors = browser.getNetworkErrors();
27+
const blockerErrors = errors.filter(e => e.toLowerCase().includes("failed") || e.toLowerCase().includes("error"));
28+
return blockerErrors.length === 0 ? "Pass" : `Fail (${blockerErrors.length} network errors)`;
29+
}
30+
},
31+
{
32+
name: "Primary navigation visible",
33+
run: async (browser, state) => {
34+
const hasLinks = state ? state.links.length > 0 : (await browser.detectFormFields()).length > 0;
35+
return hasLinks ? "Pass" : "Needs Verification";
36+
}
37+
},
38+
{
39+
name: "Screenshot captured",
40+
run: async (browser, state) => {
41+
return state?.screenshotPath ? "Pass" : "Needs Verification";
42+
}
43+
}
1344
]
1445
};
1546

agent/src/qa/qa-engine.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,15 @@ export function runQaEngine(
1818
state: BrowserState,
1919
consoleErrors: string[],
2020
networkErrors: string[],
21-
scope: string[] = []
21+
scope: string[] = [],
22+
precomputedChecklist?: Record<string, string>
2223
): QaEngineResult {
2324
const detected = detectIssues(state, consoleErrors, networkErrors);
2425
const riskTier = riskForScope(scope);
2526
return {
2627
...detected,
2728
checklist: {
28-
...buildQaChecklist(profile, state),
29+
...buildQaChecklist(profile, state, precomputedChecklist),
2930
"Risk tier assigned": riskTier,
3031
"Flaky mitigation rule loaded": flakyRules[0].mitigation
3132
},

0 commit comments

Comments
 (0)