BAKUGOS1
diff --git a/‎ARCHITECTURE.md‎
Lines changed: 12 additions & 2 deletions b/‎ARCHITECTURE.md‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 19 additions & 0 deletions b/‎README.md‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎agent/fixtures/example-user.json‎
Lines changed: 8 additions & 0 deletions b/‎agent/fixtures/example-user.json‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎agent/integrations/cypress/README.md‎
Lines changed: 58 additions & 0 deletions b/‎agent/integrations/cypress/README.md‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎agent/scripts/quality-gate.ts‎
Lines changed: 1 addition & 0 deletions b/‎agent/scripts/quality-gate.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎agent/src/api-agent/groq-tool-loop.ts‎
Lines changed: 2 additions & 0 deletions b/‎agent/src/api-agent/groq-tool-loop.ts‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎agent/src/browser/actions.ts‎
Lines changed: 126 additions & 28 deletions b/‎agent/src/browser/actions.ts‎
Lines changed: 126 additions & 28 deletions
@@ -74,6 +74,8 @@ agent/src/
 │   ├── actions.ts         # Maps command strings to browser method execution
 │   ├── browser-agent.ts   # Unified browser context, state cache, actions
 │   ├── console-listener.ts
+│   ├── cypress-runtime.ts # Retryable assertions, failure screenshots, command log
+│   ├── fixtures.ts        # Non-sensitive fixture lookup for task steps
 │   ├── login-runner.ts    # Secure credential autofill and validation
 │   ├── network-listener.ts# Collects network errors & intercepting API payloads
 │   ├── page-analyzer.ts   # Computes accessible DOM representation
@@ -156,14 +158,22 @@ To prevent the agent from performing destructive actions in production/staging e
 - **Safe Tool Whitelist**: Tools that only observe or perform standard form interaction (e.g., `open_url`, `click_by_index`, `scroll`, `hover`) bypass filters immediately, preventing false positives.
 - **Intent Pattern Matching**: Unknown or custom tools are analyzed against safety rules (regex check) for action flags before execution. This prevents data fields (like entering `email: "delete-me@gmail.com"`) from triggering message-send blockages.
 
-### 4. Zero-Dependency OOXML Excel Builder (`excel.ts`)
+### 4. Cypress-Inspired Reliability Layer (`cypress-runtime.ts`)
+QaAgent stays Playwright-native but adopts Cypress-style reliability patterns for explicit task steps:
+- Query/assertion steps retry until a timeout and re-check the current DOM each attempt.
+- Mutating actions are recorded as single-shot commands while Playwright handles actionability waits.
+- Failed commands can capture a failure screenshot.
+- Every explicit task command is written to a structured Command Log with status, attempts, duration, error, and screenshot path.
+- Fixture references load reusable non-sensitive values from `agent/fixtures`.
+
+### 5. Zero-Dependency OOXML Excel Builder (`excel.ts`)
 To remain lightweight and portable, the Excel report generator uses **no external libraries** like `exceljs` or `xlsx`. It compiles raw OpenXML files directly:
 - Writes structure files: `[Content_Types].xml`, `xl/styles.xml`, `xl/workbook.xml`, `xl/worksheets/sheet1.xml`, etc.
 - Serializes screenshots into PNG files under `xl/media/` and writes `drawing.xml` elements to position screenshots inside cells.
 - Standardizes styling: formats headers (purple background, bold white text), severity tiers (Red/Critical, Amber/High, Yellow/Medium, Blue/Low), and column widths.
 - Bundles them using a lightweight, pure Node.js CRC32-based ZIP compiler.
 
-### 5. Autonomous Explorer (`autonomous-explorer.ts`)
+### 6. Autonomous Explorer (`autonomous-explorer.ts`)
 In Codex/no-API mode, the agent isn't passive. It crawls and checks sites dynamically:
 - Locates navbar, sidebar, and tab navigation links.
 - Explores linked pages (restricted to the same origin URL).
 
@@ -27,6 +27,7 @@ QaAgent runs a local, highly-instrumented Playwright browser, captures trace evi
 * **Autonomous Crawling & Testing**: In Codex/no-API mode, the agent automatically discovers links, sidebar items, tabs, and modals within the same origin, tests form validation, and takes full-page screenshots at every step.
 * **Dual Execution Modes**: Choose **Codex/no-API mode** (ideal for local-first execution with local credentials) or **Groq API mode** (autonomous agent CLI loop utilizing model-driven tool calls).
 * **Multi-Strategy Selector Healing**: Automatically attempts to recover from failing CSS selectors using selectors history memory, text hints, ARIA roles, or indexed state coordinates before raising a failure.
+* **Cypress-Inspired Reliability**: Retryable assertions, fixture-backed task values, failure screenshots, and a Command Log sheet make dynamic UI runs easier to debug without adding Cypress as a runtime dependency.
 * **Two-Tier Safety Guard**: A proactive firewall blocking destructive actions (deletes, settings alterations, payments, bulk updates, and message broadcast sends) by default. Safe tools bypass checks to eliminate false positives.
 * **Fleshed-out QA Detectors**: Automated DOM audits checking for accessibility faults, invalid forms, pagination/horizontal scrolling failures in tables, and console/network bottlenecks.
 * **Misleading UI Detection**: An API response interceptor capturing HTTP payloads to confirm if a user-facing success toast matches the actual server API response.
@@ -54,6 +55,24 @@ Run with a task file:
 npm run agent:codex -- --task-file agent/tasks/example-task.json --headed
 ```
 
+Cypress-style task assertions:
+```json
+{
+  "cypress": {
+    "defaultCommandTimeoutMs": 5000,
+    "pollIntervalMs": 100,
+    "screenshotOnFailure": true
+  },
+  "steps": [
+    { "action": "assert_visible", "selector": "h1" },
+    { "action": "assert_text", "expected": "Example Domain" },
+    { "action": "assert_url_includes", "expected": "example.com" }
+  ]
+}
+```
+
+More details: [agent/integrations/cypress/README.md](agent/integrations/cypress/README.md).
+
 ---
 
 ## 🏛️ Architecture & System Design
 
@@ -0,0 +1,8 @@
+{
+  "name": "Qa Test User",
+  "email": "qa.user@example.com",
+  "phone": "+91 90000 00000",
+  "company": "QaAgent Demo Co",
+  "city": "Bengaluru",
+  "role": "QA Tester"
+}
@@ -0,0 +1,58 @@
+# Cypress-Inspired Reliability Layer
+
+QaAgent remains TypeScript + Playwright. It does not add Cypress as a runtime dependency. This integration adopts the Cypress ideas that fit a local-first QA agent:
+
+- Retry query/assertion steps until a timeout, instead of relying on fixed sleeps.
+- Keep mutating actions single-shot while still letting Playwright wait for actionability.
+- Capture a failure screenshot when an assertion or command fails.
+- Record every explicit task command in a structured Command Log with status, attempts, duration, error, and failure screenshot path.
+- Support fixture references for reusable non-sensitive test values.
+- Keep task steps independent so a later run does not depend on browser state from a previous run.
+
+## Task Config
+
+```json
+{
+  "cypress": {
+    "defaultCommandTimeoutMs": 5000,
+    "pollIntervalMs": 100,
+    "screenshotOnFailure": true,
+    "fixtureDir": "agent/fixtures"
+  }
+}
+```
+
+## Assertion Steps
+
+```json
+[
+  { "action": "assert_visible", "selector": "h1" },
+  { "action": "assert_text", "selector": "main", "expected": "Dashboard" },
+  { "action": "assert_url_includes", "expected": "/dashboard" },
+  { "action": "assert_count", "selector": "table tbody tr", "count": 10 }
+]
+```
+
+Assertions re-query the page until they pass or hit `defaultCommandTimeoutMs`. Use `timeoutMs` on a step to override the task default.
+
+## Fixtures
+
+Fixtures live in `agent/fixtures` by default and must contain fake or non-sensitive values only.
+
+```json
+{ "action": "fill_by_label", "text": "Email", "fixture": "example-user.email" }
+```
+
+The fixture reference above loads `agent/fixtures/example-user.json` and reads the `email` key. Passwords, tokens, cookies, customer records, and payment data must not be stored in fixtures.
+
+## Reports
+
+Markdown and Excel reports include a `Cypress-Style Command Log`. This makes flaky UI failures easier to debug because each command shows:
+
+- pass/fail status
+- command kind
+- command target
+- retry attempts
+- duration
+- error text
+- failure screenshot path
@@ -33,6 +33,7 @@ function run(name: string, command: string, args: string[]): void {
 function secretScan(): void {
   const roots = [
     "agent/src",
+    "agent/fixtures",
     "agent/scripts",
     "agent/tests",
     "agent/tasks",
 
@@ -145,6 +145,7 @@ export async function runGroqToolLoop(task: QaTask, headed: boolean, maxSteps: n
       tracePath,
       browserState: state,
       coverage,
+      commandLog: browser.recorder.commandLog(),
       qaChecklist: detected.checklist,
       memoryNotes: [
         `QA profile: ${task.qaProfile}`,
@@ -190,6 +191,7 @@ export async function runGroqToolLoop(task: QaTask, headed: boolean, maxSteps: n
       screenshots,
       tracePath,
       coverage,
+      commandLog: browser.recorder.commandLog(),
       qaChecklist: {},
       memoryNotes: [...coverage.notes, ...(tracePath ? [`Trace: ${tracePath}`] : [])],
       loginResult,
 
@@ -1,50 +1,148 @@
 import type { BrowserAgent } from "./browser-agent.js";
-import type { TaskStep } from "../shared/types.js";
+import { resolveStepValue } from "./fixtures.js";
+import { oneAttempt, retryAssertion, runCypressCommand } from "./cypress-runtime.js";
+import type { QaTask, TaskStep } from "../shared/types.js";
 
-export async function runTaskStep(browser: BrowserAgent, step: TaskStep): Promise<string | undefined> {
+export async function runTaskStep(browser: BrowserAgent, step: TaskStep, task?: QaTask): Promise<string | undefined> {
   switch (step.action) {
     case "open":
-      await browser.openUrl(step.url || "");
-      return undefined;
+      return runCypressCommand(browser, task, step, { kind: "action", name: "open", target: required(step.url, "url") }, async () => {
+        await browser.openUrl(required(step.url, "url"));
+        return oneAttempt(undefined);
+      });
     case "click":
-      await browser.click(required(step.selector, "selector"));
-      return undefined;
+      return runCypressCommand(browser, task, step, { kind: "action", name: "click" }, async () => {
+        await browser.click(required(step.selector, "selector"));
+        return oneAttempt(undefined);
+      });
     case "click_by_index":
-      await browser.clickByIndex(requiredNumber(step.index, "index"));
-      return undefined;
+      return runCypressCommand(browser, task, step, { kind: "action", name: "click_by_index", target: String(requiredNumber(step.index, "index")) }, async () => {
+        await browser.clickByIndex(requiredNumber(step.index, "index"));
+        return oneAttempt(undefined);
+      });
     case "click_by_text":
-      await browser.clickByText(required(step.text || step.value, "text"));
-      return undefined;
+      return runCypressCommand(browser, task, step, { kind: "action", name: "click_by_text", target: textValue(step, task, "text") }, async () => {
+        await browser.clickByText(textValue(step, task, "text"));
+        return oneAttempt(undefined);
+      });
     case "click_by_role":
-      await browser.clickByRole(required(step.role, "role"), step.text || step.value);
-      return undefined;
+      return runCypressCommand(browser, task, step, { kind: "action", name: "click_by_role", target: roleTarget(step, task) }, async () => {
+        await browser.clickByRole(required(step.role, "role"), optionalTextValue(step, task));
+        return oneAttempt(undefined);
+      });
     case "fill":
-      await browser.fill(required(step.selector, "selector"), step.value || "");
-      return undefined;
+      return runCypressCommand(browser, task, step, { kind: "action", name: "fill" }, async () => {
+        await browser.fill(required(step.selector, "selector"), valueFromStep(step, task));
+        return oneAttempt(undefined);
+      });
     case "fill_by_label":
-      await browser.fillByLabel(required(step.text || step.selector, "label"), step.value || "");
-      return undefined;
+      return runCypressCommand(browser, task, step, { kind: "action", name: "fill_by_label", target: required(step.text || step.selector, "label") }, async () => {
+        await browser.fillByLabel(required(step.text || step.selector, "label"), valueFromStep(step, task));
+        return oneAttempt(undefined);
+      });
     case "fill_by_placeholder":
-      await browser.fillByPlaceholder(required(step.text || step.selector, "placeholder"), step.value || "");
-      return undefined;
+      return runCypressCommand(browser, task, step, { kind: "action", name: "fill_by_placeholder", target: required(step.text || step.selector, "placeholder") }, async () => {
+        await browser.fillByPlaceholder(required(step.text || step.selector, "placeholder"), valueFromStep(step, task));
+        return oneAttempt(undefined);
+      });
     case "fill_by_name":
-      await browser.fillByName(required(step.text || step.selector, "name"), step.value || "");
-      return undefined;
+      return runCypressCommand(browser, task, step, { kind: "action", name: "fill_by_name", target: required(step.text || step.selector, "name") }, async () => {
+        await browser.fillByName(required(step.text || step.selector, "name"), valueFromStep(step, task));
+        return oneAttempt(undefined);
+      });
     case "press":
-      await browser.press(required(step.selector, "selector"), step.key || "Enter");
-      return undefined;
+      return runCypressCommand(browser, task, step, { kind: "action", name: "press", target: required(step.selector, "selector") }, async () => {
+        await browser.press(required(step.selector, "selector"), step.key || "Enter");
+        return oneAttempt(undefined);
+      });
     case "wait":
-      if (step.selector) await browser.waitForSelector(step.selector);
-      else await browser.wait(Number(step.value || 1000));
-      return undefined;
+      return runCypressCommand(browser, task, step, { kind: step.selector ? "query" : "system", name: "wait" }, async () => {
+        if (step.selector) await browser.waitForSelector(step.selector);
+        else await browser.wait(Number(step.value || 1000));
+        return oneAttempt(undefined);
+      });
     case "screenshot":
-      return browser.screenshot(step.label || "task");
+      return runCypressCommand(browser, task, step, { kind: "system", name: "screenshot", target: step.label || "task" }, async () =>
+        oneAttempt(await browser.screenshot(step.label || "task"))
+      );
     case "analyze":
-      await browser.saveBrowserState();
-      return undefined;
+      return runCypressCommand(browser, task, step, { kind: "system", name: "analyze", target: step.label || "browser-state" }, async () => {
+        await browser.saveBrowserState();
+        return oneAttempt(undefined);
+      });
+    case "assert_visible":
+      return runCypressCommand(browser, task, step, { kind: "assertion", name: "assert_visible" }, () =>
+        retryAssertion(browser, task, step, async () => {
+          const visible = await isStepTargetVisible(browser, step, task);
+          if (!visible) throw new Error(`Expected target to be visible: ${step.selector || step.text || step.role || step.value || step.label || "unknown"}`);
+          return undefined;
+        })
+      );
+    case "assert_text":
+      return runCypressCommand(browser, task, step, { kind: "assertion", name: "assert_text", target: expectedValue(step, task) }, () =>
+        retryAssertion(browser, task, step, async () => {
+          const expected = expectedValue(step, task);
+          const actual = step.selector
+            ? await browser.activePage.locator(step.selector).first().innerText({ timeout: 750 }).catch(() => "")
+            : await browser.activePage.locator("body").innerText({ timeout: 750 }).catch(() => "");
+          if (!actual.includes(expected)) throw new Error(`Expected page text to include "${expected}".`);
+          return undefined;
+        })
+      );
+    case "assert_url_includes":
+      return runCypressCommand(browser, task, step, { kind: "assertion", name: "assert_url_includes", target: expectedValue(step, task) }, () =>
+        retryAssertion(browser, task, step, async () => {
+          const expected = expectedValue(step, task);
+          const currentUrl = browser.getUrl();
+          if (!currentUrl.includes(expected)) throw new Error(`Expected URL "${currentUrl}" to include "${expected}".`);
+          return undefined;
+        })
+      );
+    case "assert_count":
+      return runCypressCommand(browser, task, step, { kind: "assertion", name: "assert_count", target: required(step.selector, "selector") }, () =>
+        retryAssertion(browser, task, step, async () => {
+          const expectedCount = requiredNumber(step.count, "count");
+          const actualCount = await browser.activePage.locator(required(step.selector, "selector")).count();
+          if (actualCount !== expectedCount) throw new Error(`Expected ${expectedCount} element(s), found ${actualCount}.`);
+          return undefined;
+        })
+      );
   }
 }
 
+async function isStepTargetVisible(browser: BrowserAgent, step: TaskStep, task?: QaTask): Promise<boolean> {
+  if (step.selector) return browser.activePage.locator(step.selector).first().isVisible().catch(() => false);
+  if (step.role) return browser.activePage.getByRole(step.role as never, optionalTextValue(step, task) ? { name: optionalTextValue(step, task) } : undefined).first().isVisible().catch(() => false);
+  return browser.activePage.getByText(textValue(step, task, "text"), { exact: false }).first().isVisible().catch(() => false);
+}
+
+function valueFromStep(step: TaskStep, task?: QaTask): string {
+  return resolveStepValue(step.value || fixtureRef(step.fixture), task);
+}
+
+function expectedValue(step: TaskStep, task?: QaTask): string {
+  return required(resolveStepValue(step.expected || step.text || step.value || step.url || fixtureRef(step.fixture), task), "expected");
+}
+
+function textValue(step: TaskStep, task: QaTask | undefined, name: string): string {
+  return required(resolveStepValue(step.text || step.value || fixtureRef(step.fixture), task), name);
+}
+
+function optionalTextValue(step: TaskStep, task?: QaTask): string | undefined {
+  const value = resolveStepValue(step.text || step.value || fixtureRef(step.fixture), task);
+  return value || undefined;
+}
+
+function roleTarget(step: TaskStep, task?: QaTask): string {
+  const name = optionalTextValue(step, task);
+  return `${required(step.role, "role")}${name ? `:${name}` : ""}`;
+}
+
+function fixtureRef(fixture: string | undefined): string | undefined {
+  if (!fixture) return undefined;
+  return fixture.startsWith("fixture:") ? fixture : `fixture:${fixture}`;
+}
+
 function required(value: string | undefined, name: string): string {
   if (!value) throw new Error(`Missing ${name} for task step.`);
   return value;