From e1d64eec1d8234be6976c37dc796a64f96e755cc Mon Sep 17 00:00:00 2001 From: Test User Date: Tue, 26 May 2026 17:39:57 -0700 Subject: [PATCH 1/5] =?UTF-8?q?feat(prd):=20stress-test=20web=20UI=20?= =?UTF-8?q?=E2=80=94=20trigger=20+=20streaming=20progress=20(#561)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a Stress Test button to the PRD page that runs the existing recursive-decomposition stress test over SSE and streams progress into a modal. Backend: - core: stress_test_prd_stream() async generator wraps the headless stress_test_prd pipeline, yielding goals_extracted / goal_analyzed / complete / error events (provider.complete offloaded via asyncio.to_thread) - GET /api/v2/prd/stress-test SSE endpoint streams those events; missing PRD / missing ANTHROPIC_API_KEY surface as in-stream error events. GET (not POST) for browser EventSource compatibility, matching the existing task stream endpoint. Frontend: - useStressTestStream hook (built on useEventSource, direct SSE connect) parses events into an idle->streaming->complete|error state machine - StressTestModal renders streaming lines, completion summary, and a graceful error state with Retry - Stress Test button wired through PRDHeader -> PRDView -> /prd page, enabled only when a PRD exists Results rendering / answer flow is out of scope (tracked in #562); the hook retains tech_spec_markdown + ambiguity_report for that work. Closes #561 --- codeframe/core/prd_stress_test.py | 62 +++++- codeframe/ui/routers/prd_v2.py | 74 +++++++- tests/core/test_prd_stress_test.py | 78 ++++++++ tests/ui/test_prd_stress_test_router.py | 158 ++++++++++++++++ web-ui/__mocks__/@hugeicons/react.js | 1 + .../components/prd/PRDHeader.test.tsx | 59 ++++++ .../__tests__/components/prd/PrdPage.test.tsx | 37 ++++ .../components/prd/StressTestModal.test.tsx | 130 +++++++++++++ .../hooks/useStressTestStream.test.ts | 162 ++++++++++++++++ web-ui/src/app/prd/page.tsx | 9 + web-ui/src/components/prd/PRDHeader.tsx | 14 ++ web-ui/src/components/prd/PRDView.tsx | 3 + web-ui/src/components/prd/StressTestModal.tsx | 135 +++++++++++++ web-ui/src/components/prd/index.ts | 1 + web-ui/src/hooks/index.ts | 12 ++ web-ui/src/hooks/useStressTestStream.ts | 178 ++++++++++++++++++ 16 files changed, 1111 insertions(+), 2 deletions(-) create mode 100644 tests/ui/test_prd_stress_test_router.py create mode 100644 web-ui/src/__tests__/components/prd/PRDHeader.test.tsx create mode 100644 web-ui/src/__tests__/components/prd/StressTestModal.test.tsx create mode 100644 web-ui/src/__tests__/hooks/useStressTestStream.test.ts create mode 100644 web-ui/src/components/prd/StressTestModal.tsx create mode 100644 web-ui/src/hooks/useStressTestStream.ts diff --git a/codeframe/core/prd_stress_test.py b/codeframe/core/prd_stress_test.py index d51810f3..461280f0 100644 --- a/codeframe/core/prd_stress_test.py +++ b/codeframe/core/prd_stress_test.py @@ -8,12 +8,13 @@ This module is headless — no FastAPI or HTTP dependencies. """ +import asyncio import json import logging import uuid from dataclasses import dataclass from enum import Enum -from typing import Optional +from typing import AsyncGenerator, Optional from codeframe.adapters.llm.base import Purpose @@ -407,3 +408,62 @@ def stress_test_prd( tech_spec_markdown=tech_spec, ambiguity_report=amb_report, ) + + +async def stress_test_prd_stream( + prd_content: str, provider, max_depth: int = 3 +) -> AsyncGenerator[dict, None]: + """Async streaming variant of :func:`stress_test_prd`. + + Yields progress event dicts suitable for SSE delivery as each top-level + goal is decomposed, so a UI can render incremental output: + + - ``{"type": "goals_extracted", "goals": [...]}`` + - ``{"type": "goal_analyzed", "goal": str, "classification": str, + "ambiguities_so_far": int}`` (once per top-level goal) + - ``{"type": "complete", "ambiguity_count": int, + "tech_spec_markdown": str, "ambiguity_report": str}`` + - ``{"type": "error", "message": str}`` if decomposition raises + + The underlying ``provider.complete()`` calls are synchronous and blocking, + so each is offloaded via :func:`asyncio.to_thread` to keep the event loop + responsive. This function stays headless (no FastAPI/HTTP imports). + """ + try: + goals = await asyncio.to_thread(extract_goals, prd_content, provider) + yield {"type": "goals_extracted", "goals": goals} + + ambiguities: list[Ambiguity] = [] + tree: list[DecompositionNode] = [] + + for goal in goals: + node = await asyncio.to_thread( + recursive_decompose, + goal, # title + goal, # description + [], # lineage + prd_content, + 0, # depth + max_depth, + ambiguities, + provider, + ) + tree.append(node) + yield { + "type": "goal_analyzed", + "goal": node.title, + "classification": node.classification.value, + "ambiguities_so_far": len(ambiguities), + } + + tech_spec = render_tech_spec(tree, ambiguities) + amb_report = render_ambiguity_report(ambiguities) + yield { + "type": "complete", + "ambiguity_count": len(ambiguities), + "tech_spec_markdown": tech_spec, + "ambiguity_report": amb_report, + } + except Exception as exc: # noqa: BLE001 — surface any failure to the client + logger.warning("Stress test stream failed: %s", exc, exc_info=True) + yield {"type": "error", "message": str(exc)} diff --git a/codeframe/ui/routers/prd_v2.py b/codeframe/ui/routers/prd_v2.py index b9d1165a..5c2bd8a8 100644 --- a/codeframe/ui/routers/prd_v2.py +++ b/codeframe/ui/routers/prd_v2.py @@ -14,10 +14,13 @@ GET /api/v2/prd/{id}/diff - Diff two versions """ +import json import logging -from typing import Optional +import os +from typing import AsyncGenerator, Optional from fastapi import APIRouter, Depends, HTTPException, Query, Request +from fastapi.responses import StreamingResponse from pydantic import BaseModel, Field from codeframe.core.workspace import Workspace @@ -186,6 +189,75 @@ async def get_latest_prd( return _prd_to_response(record) +@router.get("/stress-test") +async def stress_test_prd_stream_endpoint( + request: Request, + max_depth: int = Query(3, ge=1, le=10, description="Maximum recursion depth"), + workspace: Workspace = Depends(get_v2_workspace), +) -> StreamingResponse: + """Stream a PRD stress-test (recursive decomposition) via SSE. + + Runs the headless ``stress_test_prd_stream`` core generator over the + latest PRD and emits its progress events as Server-Sent Events. This is + the web equivalent of ``cf prd stress-test``. + + Declared as GET (not POST) so it is reachable from a browser + ``EventSource``, matching ``GET /api/v2/tasks/{task_id}/stream``. No custom + auth headers are required (cookie-based auth via ``withCredentials``). + + Event payloads (JSON in the SSE ``data:`` field, ``type`` field): + - ``goals_extracted``: high-level goals parsed from the PRD + - ``goal_analyzed``: one per top-level goal (classification + running + ambiguity count) + - ``complete``: ambiguity count + rendered tech spec / ambiguity report + - ``error``: no PRD, missing API key, or decomposition failure + + Recoverable problems (missing PRD, missing ``ANTHROPIC_API_KEY``) are + surfaced as in-stream ``error`` events rather than HTTP errors, so the + browser ``EventSource`` can display them via its message handler. + """ + from codeframe.core.prd_stress_test import stress_test_prd_stream + + def _sse(event: dict) -> str: + return f"data: {json.dumps(event)}\n\n" + + async def _generate() -> AsyncGenerator[str, None]: + record = prd.get_latest(workspace) + if not record: + yield _sse({ + "type": "error", + "message": "No PRD found. Add or generate a PRD first.", + }) + return + + api_key = os.getenv("ANTHROPIC_API_KEY") + if not api_key: + yield _sse({ + "type": "error", + "message": "ANTHROPIC_API_KEY environment variable required.", + }) + return + + from codeframe.adapters.llm.anthropic import AnthropicProvider + + provider = AnthropicProvider(api_key=api_key) + + async for event in stress_test_prd_stream( + record.content, provider, max_depth=max_depth, + ): + yield _sse(event) + + return StreamingResponse( + _generate(), + media_type="text/event-stream", + headers={ + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "X-Accel-Buffering": "no", + }, + ) + + @router.get("/{prd_id}", response_model=PrdResponse) @rate_limit_standard() async def get_prd( diff --git a/tests/core/test_prd_stress_test.py b/tests/core/test_prd_stress_test.py index 225d4be9..9b49b0a1 100644 --- a/tests/core/test_prd_stress_test.py +++ b/tests/core/test_prd_stress_test.py @@ -367,6 +367,84 @@ def test_max_depth_respected(self, sample_prd, mock_provider): assert child.children == [] # No grandchildren at depth 1 +# --- Streaming Generator Tests --- + + +class TestStressTestPrdStream: + async def test_emits_event_sequence(self, sample_prd, mock_provider): + from codeframe.core.prd_stress_test import stress_test_prd_stream + + events = [ + ev async for ev in stress_test_prd_stream( + sample_prd, mock_provider, max_depth=3, + ) + ] + + types = [e["type"] for e in events] + # First event announces extracted goals, last announces completion. + assert types[0] == "goals_extracted" + assert types[-1] == "complete" + # One goal_analyzed per top-level goal (3 in the sample PRD). + assert types.count("goal_analyzed") == 3 + + async def test_goals_extracted_payload(self, sample_prd, mock_provider): + from codeframe.core.prd_stress_test import stress_test_prd_stream + + events = [ + ev async for ev in stress_test_prd_stream(sample_prd, mock_provider) + ] + goals_event = events[0] + assert goals_event["goals"] == [ + "User Authentication", + "Invoice Management", + "PDF Export", + ] + + async def test_goal_analyzed_carries_classification_and_running_count( + self, sample_prd, mock_provider + ): + from codeframe.core.prd_stress_test import stress_test_prd_stream + + events = [ + ev async for ev in stress_test_prd_stream(sample_prd, mock_provider) + ] + analyzed = [e for e in events if e["type"] == "goal_analyzed"] + + auth = next(e for e in analyzed if e["goal"] == "User Authentication") + assert auth["classification"] == "ambiguous" + assert auth["ambiguities_so_far"] == 1 + + invoice = next(e for e in analyzed if e["goal"] == "Invoice Management") + assert invoice["classification"] == "composite" + + pdf = next(e for e in analyzed if e["goal"] == "PDF Export") + assert pdf["classification"] == "atomic" + + async def test_complete_payload(self, sample_prd, mock_provider): + from codeframe.core.prd_stress_test import stress_test_prd_stream + + events = [ + ev async for ev in stress_test_prd_stream(sample_prd, mock_provider) + ] + complete = events[-1] + assert complete["type"] == "complete" + assert complete["ambiguity_count"] == 1 + assert "# Technical Specification" in complete["tech_spec_markdown"] + assert "AUTH SCOPE" in complete["ambiguity_report"] + + async def test_provider_failure_yields_error_event(self, sample_prd): + from codeframe.core.prd_stress_test import stress_test_prd_stream + + failing = MagicMock() + failing.complete.side_effect = RuntimeError("LLM unavailable") + + events = [ + ev async for ev in stress_test_prd_stream(sample_prd, failing) + ] + assert events[-1]["type"] == "error" + assert "LLM unavailable" in events[-1]["message"] + + # --- CLI Tests --- diff --git a/tests/ui/test_prd_stress_test_router.py b/tests/ui/test_prd_stress_test_router.py new file mode 100644 index 00000000..a98041b8 --- /dev/null +++ b/tests/ui/test_prd_stress_test_router.py @@ -0,0 +1,158 @@ +"""Tests for the PRD stress-test SSE endpoint (issue #561). + +Covers GET /api/v2/prd/stress-test: +- Streams the core stress_test_prd_stream events as SSE +- Emits an in-stream error event when no PRD exists +- Emits an in-stream error event when ANTHROPIC_API_KEY is missing + +The endpoint is GET (not POST) so it is reachable from a browser EventSource, +matching the existing GET /api/v2/tasks/{task_id}/stream pattern. +""" + +import json +import shutil +import tempfile +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +from fastapi import FastAPI +from fastapi.testclient import TestClient + +from codeframe.core import prd as prd_module +from codeframe.core.workspace import create_or_load_workspace + +pytestmark = pytest.mark.v2 + + +SAMPLE_PRD = """# Invoice SaaS + +## Core Features +1. User Authentication - users can register and log in +2. Invoice Management - CRUD operations for invoices +3. PDF Export - generate PDF invoices +""" + + +@pytest.fixture +def test_workspace(): + temp_dir = Path(tempfile.mkdtemp()) + workspace_path = temp_dir / "test_workspace" + workspace_path.mkdir(parents=True, exist_ok=True) + workspace = create_or_load_workspace(workspace_path) + yield workspace + shutil.rmtree(temp_dir, ignore_errors=True) + + +@pytest.fixture +def test_client(test_workspace): + from codeframe.ui.dependencies import get_v2_workspace + from codeframe.ui.routers import prd_v2 + + app = FastAPI() + app.include_router(prd_v2.router) + + def get_test_workspace(): + return test_workspace + + app.dependency_overrides[get_v2_workspace] = get_test_workspace + + client = TestClient(app) + client.workspace = test_workspace + return client + + +@pytest.fixture +def mock_provider(): + """Mock LLM provider returning predictable decomposition responses.""" + mock = MagicMock() + + def complete_side_effect(messages, purpose=None, system=None, **kwargs): + content = messages[0]["content"] if messages else "" + response = MagicMock() + if "high-level deliverable goals" in (system or "").lower(): + response.content = json.dumps( + ["User Authentication", "Invoice Management", "PDF Export"] + ) + elif "classify" in (system or "").lower(): + goal_line = "" + for line in content.splitlines(): + if line.startswith("Goal: "): + goal_line = line[6:].strip() + break + if "Authentication" in goal_line: + response.content = json.dumps({ + "classification": "ambiguous", + "ambiguity_label": "AUTH SCOPE", + "questions": ["Email/password or OAuth?"], + "recommendation": "Add auth section", + "complexity_hint": "Medium", + }) + else: + response.content = json.dumps({ + "classification": "atomic", + "complexity_hint": "Low", + }) + else: + response.content = json.dumps( + {"classification": "atomic", "complexity_hint": "Low"} + ) + return response + + mock.complete.side_effect = complete_side_effect + return mock + + +def _parse_sse(text: str) -> list[dict]: + """Extract JSON payloads from SSE `data:` lines (ignoring heartbeats).""" + events = [] + for line in text.splitlines(): + if line.startswith("data:"): + payload = line[len("data:"):].strip() + if payload: + events.append(json.loads(payload)) + return events + + +class TestStressTestEndpoint: + @patch("codeframe.adapters.llm.anthropic.AnthropicProvider") + def test_streams_event_sequence( + self, mock_provider_cls, test_client, mock_provider, monkeypatch + ): + monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-test-fake") + mock_provider_cls.return_value = mock_provider + prd_module.store(test_client.workspace, SAMPLE_PRD, "Invoice SaaS", {}) + + response = test_client.get("/api/v2/prd/stress-test") + assert response.status_code == 200 + assert "text/event-stream" in response.headers["content-type"] + + events = _parse_sse(response.text) + types = [e["type"] for e in events] + assert types[0] == "goals_extracted" + assert types[-1] == "complete" + assert types.count("goal_analyzed") == 3 + assert events[-1]["ambiguity_count"] == 1 + + @patch("codeframe.adapters.llm.anthropic.AnthropicProvider") + def test_no_prd_emits_error_event( + self, mock_provider_cls, test_client, mock_provider, monkeypatch + ): + monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-test-fake") + mock_provider_cls.return_value = mock_provider + + response = test_client.get("/api/v2/prd/stress-test") + assert response.status_code == 200 + events = _parse_sse(response.text) + assert events[-1]["type"] == "error" + assert "prd" in events[-1]["message"].lower() + + def test_missing_api_key_emits_error_event(self, test_client, monkeypatch): + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + prd_module.store(test_client.workspace, SAMPLE_PRD, "Invoice SaaS", {}) + + response = test_client.get("/api/v2/prd/stress-test") + assert response.status_code == 200 + events = _parse_sse(response.text) + assert events[-1]["type"] == "error" + assert "ANTHROPIC_API_KEY" in events[-1]["message"] diff --git a/web-ui/__mocks__/@hugeicons/react.js b/web-ui/__mocks__/@hugeicons/react.js index 34f1a5d0..f77cb934 100644 --- a/web-ui/__mocks__/@hugeicons/react.js +++ b/web-ui/__mocks__/@hugeicons/react.js @@ -33,6 +33,7 @@ module.exports = { Upload04Icon: createIconMock('Upload04Icon'), MessageSearch01Icon: createIconMock('MessageSearch01Icon'), TaskEdit01Icon: createIconMock('TaskEdit01Icon'), + TestTube01Icon: createIconMock('TestTube01Icon'), ArtificialIntelligence01Icon: createIconMock('ArtificialIntelligence01Icon'), SentIcon: createIconMock('SentIcon'), // AppSidebar diff --git a/web-ui/src/__tests__/components/prd/PRDHeader.test.tsx b/web-ui/src/__tests__/components/prd/PRDHeader.test.tsx new file mode 100644 index 00000000..49398670 --- /dev/null +++ b/web-ui/src/__tests__/components/prd/PRDHeader.test.tsx @@ -0,0 +1,59 @@ +import { render, screen } from '@testing-library/react'; +import userEvent from '@testing-library/user-event'; +import { PRDHeader } from '@/components/prd/PRDHeader'; +import type { PrdResponse } from '@/types'; + +const fakePrd: PrdResponse = { + id: 'prd-1', + workspace_id: 'ws-1', + title: 'My PRD', + content: '# Overview', + metadata: {}, + created_at: '2026-01-01T00:00:00Z', + version: 1, + parent_id: null, + change_summary: null, + chain_id: 'chain-1', +}; + +const noop = () => {}; + +function renderHeader(overrides: Partial> = {}) { + return render( + + ); +} + +describe('PRDHeader — Stress Test button', () => { + it('is not rendered when onStressTest is not provided', () => { + renderHeader(); + expect( + screen.queryByRole('button', { name: /stress test/i }) + ).not.toBeInTheDocument(); + }); + + it('is visible and enabled when a PRD exists', () => { + renderHeader({ onStressTest: noop }); + const button = screen.getByRole('button', { name: /stress test/i }); + expect(button).toBeInTheDocument(); + expect(button).toBeEnabled(); + }); + + it('is disabled when no PRD exists', () => { + renderHeader({ prd: null, onStressTest: noop }); + expect(screen.getByRole('button', { name: /stress test/i })).toBeDisabled(); + }); + + it('calls onStressTest when clicked', async () => { + const onStressTest = jest.fn(); + renderHeader({ onStressTest }); + await userEvent.click(screen.getByRole('button', { name: /stress test/i })); + expect(onStressTest).toHaveBeenCalledTimes(1); + }); +}); diff --git a/web-ui/src/__tests__/components/prd/PrdPage.test.tsx b/web-ui/src/__tests__/components/prd/PrdPage.test.tsx index e254f7fb..68d52a9e 100644 --- a/web-ui/src/__tests__/components/prd/PrdPage.test.tsx +++ b/web-ui/src/__tests__/components/prd/PrdPage.test.tsx @@ -32,14 +32,17 @@ jest.mock('@/components/prd', () => ({ PRDView: ({ onGenerateTasks, isGeneratingTasks, + onStressTest, }: { onGenerateTasks: () => void; isGeneratingTasks: boolean; + onStressTest?: () => void; }) => (
+
), })); @@ -48,6 +51,15 @@ jest.mock('@/components/prd/UploadPRDModal', () => ({ UploadPRDModal: () => null, })); +// Capture the props passed to StressTestModal so we can assert open state. +const stressTestModalProps: { open?: boolean } = {}; +jest.mock('@/components/prd/StressTestModal', () => ({ + StressTestModal: ({ open }: { open: boolean }) => { + stressTestModalProps.open = open; + return open ?
stress-test-modal-open
: null; + }, +})); + jest.mock('next/link', () => { const MockLink = ({ href, children }: { href: string; children: React.ReactNode }) => ( {children} @@ -151,3 +163,28 @@ describe('PrdPage — handleGenerateTasks', () => { }); }); }); + +describe('PrdPage — Stress Test wiring', () => { + beforeEach(() => { + jest.clearAllMocks(); + delete stressTestModalProps.open; + mockGetSelectedWorkspacePath.mockReturnValue(WORKSPACE); + setupSWR(); + }); + + it('renders the stress-test modal closed by default', () => { + render(); + expect(stressTestModalProps.open).toBe(false); + expect(screen.queryByText('stress-test-modal-open')).not.toBeInTheDocument(); + }); + + it('opens the stress-test modal when the button is clicked', async () => { + render(); + fireEvent.click(screen.getByRole('button', { name: /stress test/i })); + + await waitFor(() => { + expect(screen.getByText('stress-test-modal-open')).toBeInTheDocument(); + }); + expect(stressTestModalProps.open).toBe(true); + }); +}); diff --git a/web-ui/src/__tests__/components/prd/StressTestModal.test.tsx b/web-ui/src/__tests__/components/prd/StressTestModal.test.tsx new file mode 100644 index 00000000..123271d4 --- /dev/null +++ b/web-ui/src/__tests__/components/prd/StressTestModal.test.tsx @@ -0,0 +1,130 @@ +import React from 'react'; +import { render, screen } from '@testing-library/react'; +import userEvent from '@testing-library/user-event'; +import { StressTestModal } from '@/components/prd/StressTestModal'; +import { useStressTestStream } from '@/hooks/useStressTestStream'; +import type { UseStressTestStreamReturn } from '@/hooks/useStressTestStream'; + +// ResizeObserver is not available in jsdom +global.ResizeObserver = jest.fn().mockImplementation(() => ({ + observe: jest.fn(), + unobserve: jest.fn(), + disconnect: jest.fn(), +})); + +// Radix ScrollArea Viewport hides children in jsdom — render children directly +jest.mock('@/components/ui/scroll-area', () => ({ + ScrollArea: ({ children }: { children: React.ReactNode }) =>
{children}
, + ScrollBar: () => null, +})); + +jest.mock('@/hooks/useStressTestStream'); + +const mockUseStressTestStream = useStressTestStream as jest.MockedFunction< + typeof useStressTestStream +>; + +const WORKSPACE = '/home/user/project'; + +function mockHook(overrides: Partial = {}) { + const value: UseStressTestStreamReturn = { + status: 'idle', + lines: [], + result: null, + error: null, + start: jest.fn(), + reset: jest.fn(), + ...overrides, + }; + mockUseStressTestStream.mockReturnValue(value); + return value; +} + +beforeEach(() => { + jest.clearAllMocks(); +}); + +describe('StressTestModal', () => { + it('calls start() when opened', () => { + const hook = mockHook({ status: 'streaming' }); + render( + + ); + expect(hook.start).toHaveBeenCalled(); + }); + + it('shows the analyzing spinner while streaming', () => { + mockHook({ status: 'streaming', lines: ['✓ Extracted 3 goals'] }); + render( + + ); + expect(screen.getByText('Analyzing PRD...')).toBeInTheDocument(); + expect(screen.getByText('✓ Extracted 3 goals')).toBeInTheDocument(); + }); + + it('shows the ambiguity summary on completion', () => { + mockHook({ + status: 'complete', + lines: ['✓ Analysis complete — 2 ambiguities found'], + result: { + ambiguityCount: 2, + techSpecMarkdown: '# spec', + ambiguityReport: 'report', + }, + }); + render( + + ); + expect(screen.getByText('Found 2 ambiguities')).toBeInTheDocument(); + }); + + it('shows a well-specified message when no ambiguities are found', () => { + mockHook({ + status: 'complete', + result: { + ambiguityCount: 0, + techSpecMarkdown: '# spec', + ambiguityReport: 'report', + }, + }); + render( + + ); + expect( + screen.getByText(/No ambiguities found/i) + ).toBeInTheDocument(); + }); + + it('shows an error message and a working Retry button', async () => { + const hook = mockHook({ + status: 'error', + error: 'ANTHROPIC_API_KEY environment variable required.', + }); + render( + + ); + + expect(screen.getByText('Stress test failed')).toBeInTheDocument(); + expect( + screen.getByText('ANTHROPIC_API_KEY environment variable required.') + ).toBeInTheDocument(); + + // start was called once on open; clicking Retry calls it again. + await userEvent.click(screen.getByRole('button', { name: 'Retry' })); + expect(hook.start).toHaveBeenCalledTimes(2); + }); + + it('closes via the Close button after completion', async () => { + const onOpenChange = jest.fn(); + mockHook({ + status: 'complete', + result: { ambiguityCount: 0, techSpecMarkdown: '', ambiguityReport: '' }, + }); + render( + + ); + + await userEvent.click(screen.getByRole('button', { name: 'Close' })); + expect(onOpenChange).toHaveBeenCalledWith(false); + }); +}); diff --git a/web-ui/src/__tests__/hooks/useStressTestStream.test.ts b/web-ui/src/__tests__/hooks/useStressTestStream.test.ts new file mode 100644 index 00000000..4e2a8865 --- /dev/null +++ b/web-ui/src/__tests__/hooks/useStressTestStream.test.ts @@ -0,0 +1,162 @@ +import { renderHook, act } from '@testing-library/react'; +import { useStressTestStream } from '@/hooks/useStressTestStream'; + +// ── EventSource mock ────────────────────────────────────────────────────── + +class MockEventSource { + static CONNECTING = 0; + static OPEN = 1; + static CLOSED = 2; + + static instances: MockEventSource[] = []; + + url: string; + readyState: number = MockEventSource.CONNECTING; + onopen: (() => void) | null = null; + onmessage: ((event: { data: string }) => void) | null = null; + onerror: ((event: unknown) => void) | null = null; + + constructor(url: string) { + this.url = url; + MockEventSource.instances.push(this); + } + + close() { + this.readyState = MockEventSource.CLOSED; + } + + // Test helpers + emit(payload: unknown) { + this.onmessage?.({ data: JSON.stringify(payload) }); + } + + static latest(): MockEventSource { + return MockEventSource.instances[MockEventSource.instances.length - 1]; + } +} + +beforeEach(() => { + MockEventSource.instances = []; + (global as unknown as { EventSource: unknown }).EventSource = MockEventSource; +}); + +const WORKSPACE = '/tmp/test-workspace'; + +describe('useStressTestStream', () => { + it('starts idle and does not open a connection', () => { + const { result } = renderHook(() => useStressTestStream(WORKSPACE)); + expect(result.current.status).toBe('idle'); + expect(MockEventSource.instances).toHaveLength(0); + }); + + it('opens a connection on start() and transitions to streaming', () => { + const { result } = renderHook(() => useStressTestStream(WORKSPACE)); + + act(() => { + result.current.start(); + }); + + expect(result.current.status).toBe('streaming'); + expect(MockEventSource.instances).toHaveLength(1); + expect(MockEventSource.latest().url).toContain('/api/v2/prd/stress-test'); + expect(MockEventSource.latest().url).toContain( + `workspace_path=${encodeURIComponent(WORKSPACE)}` + ); + }); + + it('accumulates human-readable lines from progress events', () => { + const { result } = renderHook(() => useStressTestStream(WORKSPACE)); + act(() => result.current.start()); + + act(() => { + MockEventSource.latest().emit({ + type: 'goals_extracted', + goals: ['Auth', 'Invoicing', 'Export'], + }); + }); + act(() => { + MockEventSource.latest().emit({ + type: 'goal_analyzed', + goal: 'Auth', + classification: 'ambiguous', + ambiguities_so_far: 1, + }); + }); + + expect(result.current.lines).toEqual([ + '✓ Extracted 3 goals', + '⚠ Auth — ambiguous', + ]); + expect(result.current.status).toBe('streaming'); + }); + + it('transitions to complete and exposes results', () => { + const { result } = renderHook(() => useStressTestStream(WORKSPACE)); + act(() => result.current.start()); + + act(() => { + MockEventSource.latest().emit({ + type: 'complete', + ambiguity_count: 2, + tech_spec_markdown: '# Technical Specification', + ambiguity_report: 'PRD Stress Test — 2 ambiguities found', + }); + }); + + expect(result.current.status).toBe('complete'); + expect(result.current.result).toEqual({ + ambiguityCount: 2, + techSpecMarkdown: '# Technical Specification', + ambiguityReport: 'PRD Stress Test — 2 ambiguities found', + }); + // Connection should be closed on completion to avoid a reconnect loop. + expect(MockEventSource.latest().readyState).toBe(MockEventSource.CLOSED); + }); + + it('transitions to error and captures the message', () => { + const { result } = renderHook(() => useStressTestStream(WORKSPACE)); + act(() => result.current.start()); + + act(() => { + MockEventSource.latest().emit({ + type: 'error', + message: 'ANTHROPIC_API_KEY environment variable required.', + }); + }); + + expect(result.current.status).toBe('error'); + expect(result.current.error).toBe( + 'ANTHROPIC_API_KEY environment variable required.' + ); + }); + + it('retries with a fresh connection after an error', () => { + const { result } = renderHook(() => useStressTestStream(WORKSPACE)); + act(() => result.current.start()); + act(() => { + MockEventSource.latest().emit({ type: 'error', message: 'boom' }); + }); + expect(result.current.status).toBe('error'); + + act(() => result.current.start()); + + expect(result.current.status).toBe('streaming'); + expect(result.current.error).toBeNull(); + // A second, distinct EventSource should have been created. + expect(MockEventSource.instances.length).toBeGreaterThanOrEqual(2); + const urls = MockEventSource.instances.map((es) => es.url); + expect(new Set(urls).size).toBe(urls.length); + }); + + it('reset() closes the connection and returns to idle', () => { + const { result } = renderHook(() => useStressTestStream(WORKSPACE)); + act(() => result.current.start()); + const es = MockEventSource.latest(); + + act(() => result.current.reset()); + + expect(result.current.status).toBe('idle'); + expect(result.current.lines).toEqual([]); + expect(es.readyState).toBe(MockEventSource.CLOSED); + }); +}); diff --git a/web-ui/src/app/prd/page.tsx b/web-ui/src/app/prd/page.tsx index 9c8a7798..d3a0edc3 100644 --- a/web-ui/src/app/prd/page.tsx +++ b/web-ui/src/app/prd/page.tsx @@ -7,6 +7,7 @@ import useSWR from 'swr'; import { PRDView } from '@/components/prd'; import { UploadPRDModal } from '@/components/prd/UploadPRDModal'; import { PRDVersionHistoryModal } from '@/components/prd/PRDVersionHistoryModal'; +import { StressTestModal } from '@/components/prd/StressTestModal'; import { prdApi, tasksApi, discoveryApi } from '@/lib/api'; import { getSelectedWorkspacePath } from '@/lib/workspace-storage'; import type { @@ -23,6 +24,7 @@ export default function PrdPage() { const [isSaving, setIsSaving] = useState(false); const [isGeneratingTasks, setIsGeneratingTasks] = useState(false); const [versionHistoryOpen, setVersionHistoryOpen] = useState(false); + const [stressTestOpen, setStressTestOpen] = useState(false); useEffect(() => { setWorkspacePath(getSelectedWorkspacePath()); @@ -175,6 +177,7 @@ export default function PrdPage() { onSavePrd={handleSavePrd} onPrdGenerated={handlePrdGenerated} onViewHistory={() => setVersionHistoryOpen(true)} + onStressTest={() => setStressTestOpen(true)} /> )} + + ); diff --git a/web-ui/src/components/prd/PRDHeader.tsx b/web-ui/src/components/prd/PRDHeader.tsx index 16530fdf..a46d6886 100644 --- a/web-ui/src/components/prd/PRDHeader.tsx +++ b/web-ui/src/components/prd/PRDHeader.tsx @@ -5,6 +5,7 @@ import { Upload04Icon, MessageSearch01Icon, TaskEdit01Icon, + TestTube01Icon, Loading03Icon, Time01Icon, } from '@hugeicons/react'; @@ -18,6 +19,7 @@ interface PRDHeaderProps { onStartDiscovery: () => void; onGenerateTasks: () => void; onViewHistory?: () => void; + onStressTest?: () => void; } export function PRDHeader({ @@ -27,6 +29,7 @@ export function PRDHeader({ onStartDiscovery, onGenerateTasks, onViewHistory, + onStressTest, }: PRDHeaderProps) { return (
@@ -56,6 +59,17 @@ export function PRDHeader({ {prd ? 'Upload New' : 'Upload PRD'} + {onStressTest && ( + + )} + )} + + + + + ); +} diff --git a/web-ui/src/components/prd/index.ts b/web-ui/src/components/prd/index.ts index 95e4c488..7c9be3e0 100644 --- a/web-ui/src/components/prd/index.ts +++ b/web-ui/src/components/prd/index.ts @@ -7,3 +7,4 @@ export { DiscoveryTranscript } from './DiscoveryTranscript'; export { DiscoveryInput } from './DiscoveryInput'; export { AssociatedTasksSummary } from './AssociatedTasksSummary'; export { PRDVersionHistoryModal } from './PRDVersionHistoryModal'; +export { StressTestModal } from './StressTestModal'; diff --git a/web-ui/src/hooks/index.ts b/web-ui/src/hooks/index.ts index 7769dc9c..a3a01175 100644 --- a/web-ui/src/hooks/index.ts +++ b/web-ui/src/hooks/index.ts @@ -20,3 +20,15 @@ export { type ErrorEvent, type HeartbeatEvent, } from './useTaskStream'; +export { + useStressTestStream, + type UseStressTestStreamReturn, + type StressTestStatus, + type StressTestResultData, + type StressTestEvent, + type StressTestEventType, + type StressTestGoalsExtractedEvent, + type StressTestGoalAnalyzedEvent, + type StressTestCompleteEvent, + type StressTestErrorEvent, +} from './useStressTestStream'; diff --git a/web-ui/src/hooks/useStressTestStream.ts b/web-ui/src/hooks/useStressTestStream.ts new file mode 100644 index 00000000..402e7c33 --- /dev/null +++ b/web-ui/src/hooks/useStressTestStream.ts @@ -0,0 +1,178 @@ +'use client'; + +import { useCallback, useRef, useState } from 'react'; +import { useEventSource } from './useEventSource'; + +// ── Event types matching the backend stress_test_prd_stream payloads ────── + +export type StressTestEventType = + | 'goals_extracted' + | 'goal_analyzed' + | 'complete' + | 'error'; + +export interface StressTestGoalsExtractedEvent { + type: 'goals_extracted'; + goals: string[]; +} + +export interface StressTestGoalAnalyzedEvent { + type: 'goal_analyzed'; + goal: string; + classification: 'atomic' | 'composite' | 'ambiguous'; + ambiguities_so_far: number; +} + +export interface StressTestCompleteEvent { + type: 'complete'; + ambiguity_count: number; + tech_spec_markdown: string; + ambiguity_report: string; +} + +export interface StressTestErrorEvent { + type: 'error'; + message: string; +} + +export type StressTestEvent = + | StressTestGoalsExtractedEvent + | StressTestGoalAnalyzedEvent + | StressTestCompleteEvent + | StressTestErrorEvent; + +// ── Hook state ──────────────────────────────────────────────────────────── + +export type StressTestStatus = 'idle' | 'streaming' | 'complete' | 'error'; + +/** Decomposition results, retained for the results view (issue #562). */ +export interface StressTestResultData { + ambiguityCount: number; + techSpecMarkdown: string; + ambiguityReport: string; +} + +export interface UseStressTestStreamReturn { + status: StressTestStatus; + /** Human-readable progress lines accumulated from incoming events. */ + lines: string[]; + result: StressTestResultData | null; + error: string | null; + /** Begin (or restart) the stress-test stream. */ + start: () => void; + /** Stop streaming and clear all state back to idle. */ + reset: () => void; +} + +function classificationIcon(classification: string): string { + return classification === 'ambiguous' ? '⚠' : '✓'; +} + +/** + * Subscribes to the PRD stress-test SSE stream at + * `GET /api/v2/prd/stress-test`, parsing JSON events into a small state + * machine (idle → streaming → complete | error) plus human-readable lines. + * + * Mirrors `useTaskStream`: connects directly to `NEXT_PUBLIC_SSE_URL` + * because the Next.js rewrite proxy buffers chunked responses and would + * prevent SSE events from streaming incrementally. + */ +export function useStressTestStream( + workspacePath: string | null +): UseStressTestStreamReturn { + const [status, setStatus] = useState('idle'); + const [lines, setLines] = useState([]); + const [result, setResult] = useState(null); + const [error, setError] = useState(null); + const [active, setActive] = useState(false); + // Bumped on every start() so a retry produces a fresh URL — useEventSource + // keys off the URL string, so reusing it verbatim would not reconnect. + const [runId, setRunId] = useState(0); + + // Ref to close() so the message handler can stop the stream on a terminal + // event without a stale closure (close is created after handleMessage). + const closeRef = useRef<() => void>(() => {}); + + const sseBase = process.env.NEXT_PUBLIC_SSE_URL || 'http://localhost:8000'; + const url = + active && workspacePath + ? `${sseBase}/api/v2/prd/stress-test?workspace_path=${encodeURIComponent(workspacePath)}&run=${runId}` + : null; + + const handleMessage = useCallback((data: string) => { + let event: StressTestEvent; + try { + event = JSON.parse(data) as StressTestEvent; + } catch { + // Ignore malformed messages (e.g. SSE comment heartbeats) + return; + } + + switch (event.type) { + case 'goals_extracted': { + const n = event.goals.length; + setLines((prev) => [ + ...prev, + `✓ Extracted ${n} goal${n === 1 ? '' : 's'}`, + ]); + break; + } + case 'goal_analyzed': + setLines((prev) => [ + ...prev, + `${classificationIcon(event.classification)} ${event.goal} — ${event.classification}`, + ]); + break; + case 'complete': { + const n = event.ambiguity_count; + setLines((prev) => [ + ...prev, + `✓ Analysis complete — ${n} ambiguit${n === 1 ? 'y' : 'ies'} found`, + ]); + setResult({ + ambiguityCount: event.ambiguity_count, + techSpecMarkdown: event.tech_spec_markdown, + ambiguityReport: event.ambiguity_report, + }); + setStatus('complete'); + // Server closes after this; close ourselves to avoid a reconnect loop. + closeRef.current(); + break; + } + case 'error': + setError(event.message); + setStatus('error'); + closeRef.current(); + break; + } + }, []); + + const { close } = useEventSource({ + url, + onMessage: handleMessage, + // The stress-test is a one-shot stream; don't auto-reconnect when the + // server closes the connection on completion. + maxRetries: 0, + }); + closeRef.current = close; + + const start = useCallback(() => { + setLines([]); + setResult(null); + setError(null); + setStatus('streaming'); + setRunId((id) => id + 1); + setActive(true); + }, []); + + const reset = useCallback(() => { + close(); + setActive(false); + setStatus('idle'); + setLines([]); + setResult(null); + setError(null); + }, [close]); + + return { status, lines, result, error, start, reset }; +} From 18ef2191680b80528d193d43e0b107d39b3608ba Mon Sep 17 00:00:00 2001 From: Test User Date: Tue, 26 May 2026 17:46:48 -0700 Subject: [PATCH 2/5] fix(prd): handle SSE transport failures and client disconnect (#561) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address cross-family (codex) review: - Frontend: useStressTestStream now observes useEventSource onError, so a transport-level failure (server down, 404/CORS, dropped connection) with no data frame transitions the modal to the error state instead of hanging on 'Analyzing PRD...'. Guarded to ignore transient (non-CLOSED) reconnect errors and to not clobber a backend-sent error event. - Backend: extract the SSE generator to _stress_test_event_stream and check request.is_disconnected() between events so an abandoned stream stops issuing further (billable) LLM calls — mirroring event_stream_generator. --- codeframe/ui/routers/prd_v2.py | 89 +++++++++++-------- tests/ui/test_prd_stress_test_router.py | 67 ++++++++++++++ .../hooks/useStressTestStream.test.ts | 48 ++++++++++ web-ui/src/hooks/useStressTestStream.ts | 24 +++++ 4 files changed, 192 insertions(+), 36 deletions(-) diff --git a/codeframe/ui/routers/prd_v2.py b/codeframe/ui/routers/prd_v2.py index 5c2bd8a8..e28dcc29 100644 --- a/codeframe/ui/routers/prd_v2.py +++ b/codeframe/ui/routers/prd_v2.py @@ -189,6 +189,58 @@ async def get_latest_prd( return _prd_to_response(record) +def _sse(event: dict) -> str: + """Format a stress-test event dict as an SSE ``data:`` frame.""" + return f"data: {json.dumps(event)}\n\n" + + +async def _stress_test_event_stream( + workspace: Workspace, + max_depth: int, + request: Optional[Request] = None, +) -> AsyncGenerator[str, None]: + """Yield SSE frames for a PRD stress-test. + + Recoverable problems (missing PRD, missing ``ANTHROPIC_API_KEY``) are + surfaced as in-stream ``error`` events rather than HTTP errors, so a + browser ``EventSource`` can display them via its message handler. + + Stops early if the client disconnects, so an abandoned stream does not keep + issuing LLM calls — mirroring ``event_stream_generator`` in streaming_v2. + """ + from codeframe.core.prd_stress_test import stress_test_prd_stream + + record = prd.get_latest(workspace) + if not record: + yield _sse({ + "type": "error", + "message": "No PRD found. Add or generate a PRD first.", + }) + return + + api_key = os.getenv("ANTHROPIC_API_KEY") + if not api_key: + yield _sse({ + "type": "error", + "message": "ANTHROPIC_API_KEY environment variable required.", + }) + return + + from codeframe.adapters.llm.anthropic import AnthropicProvider + + provider = AnthropicProvider(api_key=api_key) + + async for event in stress_test_prd_stream( + record.content, provider, max_depth=max_depth, + ): + # If the browser has gone away, stop iterating the core generator so its + # next (blocking, billable) LLM call is never made. + if request is not None and await request.is_disconnected(): + logger.info("Client disconnected from stress-test stream; aborting") + break + yield _sse(event) + + @router.get("/stress-test") async def stress_test_prd_stream_endpoint( request: Request, @@ -211,44 +263,9 @@ async def stress_test_prd_stream_endpoint( ambiguity count) - ``complete``: ambiguity count + rendered tech spec / ambiguity report - ``error``: no PRD, missing API key, or decomposition failure - - Recoverable problems (missing PRD, missing ``ANTHROPIC_API_KEY``) are - surfaced as in-stream ``error`` events rather than HTTP errors, so the - browser ``EventSource`` can display them via its message handler. """ - from codeframe.core.prd_stress_test import stress_test_prd_stream - - def _sse(event: dict) -> str: - return f"data: {json.dumps(event)}\n\n" - - async def _generate() -> AsyncGenerator[str, None]: - record = prd.get_latest(workspace) - if not record: - yield _sse({ - "type": "error", - "message": "No PRD found. Add or generate a PRD first.", - }) - return - - api_key = os.getenv("ANTHROPIC_API_KEY") - if not api_key: - yield _sse({ - "type": "error", - "message": "ANTHROPIC_API_KEY environment variable required.", - }) - return - - from codeframe.adapters.llm.anthropic import AnthropicProvider - - provider = AnthropicProvider(api_key=api_key) - - async for event in stress_test_prd_stream( - record.content, provider, max_depth=max_depth, - ): - yield _sse(event) - return StreamingResponse( - _generate(), + _stress_test_event_stream(workspace, max_depth, request), media_type="text/event-stream", headers={ "Cache-Control": "no-cache", diff --git a/tests/ui/test_prd_stress_test_router.py b/tests/ui/test_prd_stress_test_router.py index a98041b8..1cdd60f8 100644 --- a/tests/ui/test_prd_stress_test_router.py +++ b/tests/ui/test_prd_stress_test_router.py @@ -156,3 +156,70 @@ def test_missing_api_key_emits_error_event(self, test_client, monkeypatch): events = _parse_sse(response.text) assert events[-1]["type"] == "error" assert "ANTHROPIC_API_KEY" in events[-1]["message"] + + +class TestStressTestDisconnect: + """The stream must stop issuing LLM calls once the client disconnects.""" + + async def test_aborts_when_client_disconnects( + self, test_workspace, mock_provider, monkeypatch + ): + import codeframe.adapters.llm.anthropic as anthropic_mod + from codeframe.ui.routers.prd_v2 import _stress_test_event_stream + + monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-test-fake") + monkeypatch.setattr( + anthropic_mod, "AnthropicProvider", lambda *a, **k: mock_provider + ) + prd_module.store(test_workspace, SAMPLE_PRD, "Invoice SaaS", {}) + + class FakeRequest: + """Reports connected for the first event, then disconnected.""" + + def __init__(self): + self.calls = 0 + + async def is_disconnected(self): + self.calls += 1 + return self.calls > 1 + + frames = [ + frame + async for frame in _stress_test_event_stream( + test_workspace, max_depth=3, request=FakeRequest() + ) + ] + + # Only the first frame (goals_extracted) is emitted before the + # disconnect is detected; no `complete` frame is sent. + types = [json.loads(f[len("data:"):].strip())["type"] for f in frames] + assert types == ["goals_extracted"] + # The abort stops further decomposition: a full run of the 3-goal sample + # PRD would make 6 LLM calls (extract + auth + invoice + 2 children + + # pdf); aborting after the first goal makes far fewer. + assert mock_provider.complete.call_count < 6 + + async def test_completes_when_client_stays_connected( + self, test_workspace, mock_provider, monkeypatch + ): + import codeframe.adapters.llm.anthropic as anthropic_mod + from codeframe.ui.routers.prd_v2 import _stress_test_event_stream + + monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-test-fake") + monkeypatch.setattr( + anthropic_mod, "AnthropicProvider", lambda *a, **k: mock_provider + ) + prd_module.store(test_workspace, SAMPLE_PRD, "Invoice SaaS", {}) + + class ConnectedRequest: + async def is_disconnected(self): + return False + + frames = [ + frame + async for frame in _stress_test_event_stream( + test_workspace, max_depth=3, request=ConnectedRequest() + ) + ] + types = [json.loads(f[len("data:"):].strip())["type"] for f in frames] + assert types[-1] == "complete" diff --git a/web-ui/src/__tests__/hooks/useStressTestStream.test.ts b/web-ui/src/__tests__/hooks/useStressTestStream.test.ts index 4e2a8865..9c9b6c8e 100644 --- a/web-ui/src/__tests__/hooks/useStressTestStream.test.ts +++ b/web-ui/src/__tests__/hooks/useStressTestStream.test.ts @@ -30,6 +30,12 @@ class MockEventSource { this.onmessage?.({ data: JSON.stringify(payload) }); } + /** Simulate a transport-level error. Pass the resulting readyState. */ + emitError(readyState: number = MockEventSource.CLOSED) { + this.readyState = readyState; + this.onerror?.({ target: this }); + } + static latest(): MockEventSource { return MockEventSource.instances[MockEventSource.instances.length - 1]; } @@ -148,6 +154,48 @@ describe('useStressTestStream', () => { expect(new Set(urls).size).toBe(urls.length); }); + it('reports a transport failure (closed connection, no data) as an error', () => { + const { result } = renderHook(() => useStressTestStream(WORKSPACE)); + act(() => result.current.start()); + + // EventSource fails before any `data:` frame and ends up CLOSED. + act(() => { + MockEventSource.latest().emitError(MockEventSource.CLOSED); + }); + + expect(result.current.status).toBe('error'); + expect(result.current.error).toMatch(/connection to the stress-test stream failed/i); + }); + + it('ignores transient (non-closed) connection errors while streaming', () => { + const { result } = renderHook(() => useStressTestStream(WORKSPACE)); + act(() => result.current.start()); + + // A transient error where the browser will reconnect (readyState CONNECTING). + act(() => { + MockEventSource.latest().emitError(MockEventSource.CONNECTING); + }); + + expect(result.current.status).toBe('streaming'); + expect(result.current.error).toBeNull(); + }); + + it('does not overwrite a backend error event with a transport error', () => { + const { result } = renderHook(() => useStressTestStream(WORKSPACE)); + act(() => result.current.start()); + + act(() => { + MockEventSource.latest().emit({ type: 'error', message: 'boom from server' }); + }); + // Server then closes the connection, firing onerror — must not clobber. + act(() => { + MockEventSource.latest().emitError(MockEventSource.CLOSED); + }); + + expect(result.current.status).toBe('error'); + expect(result.current.error).toBe('boom from server'); + }); + it('reset() closes the connection and returns to idle', () => { const { result } = renderHook(() => useStressTestStream(WORKSPACE)); act(() => result.current.start()); diff --git a/web-ui/src/hooks/useStressTestStream.ts b/web-ui/src/hooks/useStressTestStream.ts index 402e7c33..3e2e0a3d 100644 --- a/web-ui/src/hooks/useStressTestStream.ts +++ b/web-ui/src/hooks/useStressTestStream.ts @@ -92,6 +92,10 @@ export function useStressTestStream( // Ref to close() so the message handler can stop the stream on a terminal // event without a stale closure (close is created after handleMessage). const closeRef = useRef<() => void>(() => {}); + // Tracks whether a terminal data event (complete/error) was received, so a + // transport-level error fired afterward (e.g. the server closing the stream) + // is not misreported as a connection failure. + const terminalRef = useRef(false); const sseBase = process.env.NEXT_PUBLIC_SSE_URL || 'http://localhost:8000'; const url = @@ -134,12 +138,14 @@ export function useStressTestStream( techSpecMarkdown: event.tech_spec_markdown, ambiguityReport: event.ambiguity_report, }); + terminalRef.current = true; setStatus('complete'); // Server closes after this; close ourselves to avoid a reconnect loop. closeRef.current(); break; } case 'error': + terminalRef.current = true; setError(event.message); setStatus('error'); closeRef.current(); @@ -147,9 +153,26 @@ export function useStressTestStream( } }, []); + // Surface transport-level failures (server down, 404/CORS, dropped + // connection) that arrive without any `data:` frame. Without this the modal + // would stay on "Analyzing PRD..." forever. Only act on a CLOSED connection + // so the browser's own transient-reconnect attempts aren't reported as + // failures, and only when no terminal data event has been received. + const handleError = useCallback((event: Event) => { + if (terminalRef.current) return; + const es = event.target as EventSource | null; + if (es && es.readyState !== EventSource.CLOSED) return; + setError( + (prev) => + prev ?? 'Connection to the stress-test stream failed. Please try again.' + ); + setStatus('error'); + }, []); + const { close } = useEventSource({ url, onMessage: handleMessage, + onError: handleError, // The stress-test is a one-shot stream; don't auto-reconnect when the // server closes the connection on completion. maxRetries: 0, @@ -157,6 +180,7 @@ export function useStressTestStream( closeRef.current = close; const start = useCallback(() => { + terminalRef.current = false; setLines([]); setResult(null); setError(null); From 802ecdd2a8dfc4d5bf0507da95e2cef930e088c5 Mon Sep 17 00:00:00 2001 From: Test User Date: Tue, 26 May 2026 17:58:33 -0700 Subject: [PATCH 3/5] refactor(prd): address review feedback on stress-test UI (#561) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add @rate_limit_standard() to the stress-test SSE endpoint (it runs recursive LLM decomposition — throttle burst abuse), matching the rest of prd_v2. (coderabbit Major / claude High) - Move SSE event payload types to web-ui/src/types/prd.ts (re-exported from types/index.ts) per the repo's type-location guideline; the hook imports them. (coderabbit Major) - Guard useStressTestStream.start() when workspacePath is null — fail fast to the error state instead of hanging in 'streaming'. (coderabbit) - Drop the redundant ScrollArea wrapper in StressTestModal so the auto-scroll ref and the visible scrollbar are the same element. (claude Medium) - Tighten the disconnect-abort test assertion to <= 2 calls (this fixture's full run is 4). (coderabbit) --- codeframe/ui/routers/prd_v2.py | 1 + tests/ui/test_prd_stress_test_router.py | 8 ++-- .../hooks/useStressTestStream.test.ts | 10 ++++ web-ui/src/components/prd/StressTestModal.tsx | 26 +++++----- web-ui/src/hooks/index.ts | 6 --- web-ui/src/hooks/useStressTestStream.ts | 47 ++++--------------- web-ui/src/types/index.ts | 6 +++ web-ui/src/types/prd.ts | 41 ++++++++++++++++ 8 files changed, 82 insertions(+), 63 deletions(-) diff --git a/codeframe/ui/routers/prd_v2.py b/codeframe/ui/routers/prd_v2.py index e28dcc29..76bbd45f 100644 --- a/codeframe/ui/routers/prd_v2.py +++ b/codeframe/ui/routers/prd_v2.py @@ -242,6 +242,7 @@ async def _stress_test_event_stream( @router.get("/stress-test") +@rate_limit_standard() async def stress_test_prd_stream_endpoint( request: Request, max_depth: int = Query(3, ge=1, le=10, description="Maximum recursion depth"), diff --git a/tests/ui/test_prd_stress_test_router.py b/tests/ui/test_prd_stress_test_router.py index 1cdd60f8..22413c09 100644 --- a/tests/ui/test_prd_stress_test_router.py +++ b/tests/ui/test_prd_stress_test_router.py @@ -194,10 +194,10 @@ async def is_disconnected(self): # disconnect is detected; no `complete` frame is sent. types = [json.loads(f[len("data:"):].strip())["type"] for f in frames] assert types == ["goals_extracted"] - # The abort stops further decomposition: a full run of the 3-goal sample - # PRD would make 6 LLM calls (extract + auth + invoice + 2 children + - # pdf); aborting after the first goal makes far fewer. - assert mock_provider.complete.call_count < 6 + # The abort stops further decomposition. A full run of this fixture is 4 + # calls (extract + 3 atomic goals); aborting after goal 1 is at most 2 + # (extract + first goal's classification). + assert mock_provider.complete.call_count <= 2 async def test_completes_when_client_stays_connected( self, test_workspace, mock_provider, monkeypatch diff --git a/web-ui/src/__tests__/hooks/useStressTestStream.test.ts b/web-ui/src/__tests__/hooks/useStressTestStream.test.ts index 9c9b6c8e..ea97f0a0 100644 --- a/web-ui/src/__tests__/hooks/useStressTestStream.test.ts +++ b/web-ui/src/__tests__/hooks/useStressTestStream.test.ts @@ -196,6 +196,16 @@ describe('useStressTestStream', () => { expect(result.current.error).toBe('boom from server'); }); + it('fails fast (no connection) when workspacePath is null', () => { + const { result } = renderHook(() => useStressTestStream(null)); + + act(() => result.current.start()); + + expect(result.current.status).toBe('error'); + expect(result.current.error).toMatch(/no workspace selected/i); + expect(MockEventSource.instances).toHaveLength(0); + }); + it('reset() closes the connection and returns to idle', () => { const { result } = renderHook(() => useStressTestStream(WORKSPACE)); act(() => result.current.start()); diff --git a/web-ui/src/components/prd/StressTestModal.tsx b/web-ui/src/components/prd/StressTestModal.tsx index 1e05277a..a53b627a 100644 --- a/web-ui/src/components/prd/StressTestModal.tsx +++ b/web-ui/src/components/prd/StressTestModal.tsx @@ -15,7 +15,6 @@ import { DialogDescription, } from '@/components/ui/dialog'; import { Button } from '@/components/ui/button'; -import { ScrollArea } from '@/components/ui/scroll-area'; import { useStressTestStream } from '@/hooks/useStressTestStream'; interface StressTestModalProps { @@ -90,20 +89,19 @@ export function StressTestModal({ )} - {/* Streaming log */} + {/* Streaming log — single scroll container so the auto-scroll ref + (scrollTop) and the visible scrollbar are the same element. */} {(status === 'streaming' || status === 'complete') && lines.length > 0 && ( - -
- {lines.map((line, i) => ( -
- {line} -
- ))} -
-
+
+ {lines.map((line, i) => ( +
+ {line} +
+ ))} +
)} {/* Error state */} diff --git a/web-ui/src/hooks/index.ts b/web-ui/src/hooks/index.ts index a3a01175..ceb82e07 100644 --- a/web-ui/src/hooks/index.ts +++ b/web-ui/src/hooks/index.ts @@ -25,10 +25,4 @@ export { type UseStressTestStreamReturn, type StressTestStatus, type StressTestResultData, - type StressTestEvent, - type StressTestEventType, - type StressTestGoalsExtractedEvent, - type StressTestGoalAnalyzedEvent, - type StressTestCompleteEvent, - type StressTestErrorEvent, } from './useStressTestStream'; diff --git a/web-ui/src/hooks/useStressTestStream.ts b/web-ui/src/hooks/useStressTestStream.ts index 3e2e0a3d..b0aac627 100644 --- a/web-ui/src/hooks/useStressTestStream.ts +++ b/web-ui/src/hooks/useStressTestStream.ts @@ -2,44 +2,7 @@ import { useCallback, useRef, useState } from 'react'; import { useEventSource } from './useEventSource'; - -// ── Event types matching the backend stress_test_prd_stream payloads ────── - -export type StressTestEventType = - | 'goals_extracted' - | 'goal_analyzed' - | 'complete' - | 'error'; - -export interface StressTestGoalsExtractedEvent { - type: 'goals_extracted'; - goals: string[]; -} - -export interface StressTestGoalAnalyzedEvent { - type: 'goal_analyzed'; - goal: string; - classification: 'atomic' | 'composite' | 'ambiguous'; - ambiguities_so_far: number; -} - -export interface StressTestCompleteEvent { - type: 'complete'; - ambiguity_count: number; - tech_spec_markdown: string; - ambiguity_report: string; -} - -export interface StressTestErrorEvent { - type: 'error'; - message: string; -} - -export type StressTestEvent = - | StressTestGoalsExtractedEvent - | StressTestGoalAnalyzedEvent - | StressTestCompleteEvent - | StressTestErrorEvent; +import type { StressTestEvent } from '@/types'; // ── Hook state ──────────────────────────────────────────────────────────── @@ -184,10 +147,16 @@ export function useStressTestStream( setLines([]); setResult(null); setError(null); + if (!workspacePath) { + // No URL can be built — fail fast instead of hanging in 'streaming'. + setError('No workspace selected.'); + setStatus('error'); + return; + } setStatus('streaming'); setRunId((id) => id + 1); setActive(true); - }, []); + }, [workspacePath]); const reset = useCallback(() => { close(); diff --git a/web-ui/src/types/index.ts b/web-ui/src/types/index.ts index 1b0c06d8..a9187928 100644 --- a/web-ui/src/types/index.ts +++ b/web-ui/src/types/index.ts @@ -16,6 +16,12 @@ export type { DiscoveryMessage, DiscoveryRole, DiscoveryState, + StressTestEventType, + StressTestGoalsExtractedEvent, + StressTestGoalAnalyzedEvent, + StressTestCompleteEvent, + StressTestErrorEvent, + StressTestEvent, } from './prd'; // Workspace types diff --git a/web-ui/src/types/prd.ts b/web-ui/src/types/prd.ts index 772dca4e..bf7bae70 100644 --- a/web-ui/src/types/prd.ts +++ b/web-ui/src/types/prd.ts @@ -94,3 +94,44 @@ export interface DiscoveryMessage { } export type DiscoveryState = 'idle' | 'discovering' | 'completed'; + +// --------------------------------------------------------------------------- +// PRD stress-test SSE event types (mirror stress_test_prd_stream in +// codeframe/core/prd_stress_test.py, streamed by GET /api/v2/prd/stress-test) +// --------------------------------------------------------------------------- + +export type StressTestEventType = + | 'goals_extracted' + | 'goal_analyzed' + | 'complete' + | 'error'; + +export interface StressTestGoalsExtractedEvent { + type: 'goals_extracted'; + goals: string[]; +} + +export interface StressTestGoalAnalyzedEvent { + type: 'goal_analyzed'; + goal: string; + classification: 'atomic' | 'composite' | 'ambiguous'; + ambiguities_so_far: number; +} + +export interface StressTestCompleteEvent { + type: 'complete'; + ambiguity_count: number; + tech_spec_markdown: string; + ambiguity_report: string; +} + +export interface StressTestErrorEvent { + type: 'error'; + message: string; +} + +export type StressTestEvent = + | StressTestGoalsExtractedEvent + | StressTestGoalAnalyzedEvent + | StressTestCompleteEvent + | StressTestErrorEvent; From d4d92674f71ff3a2bfa8c9bcd57f25d61094d24b Mon Sep 17 00:00:00 2001 From: Test User Date: Tue, 26 May 2026 18:01:08 -0700 Subject: [PATCH 4/5] refactor(prd): resolve LLM provider via config chain for stress-test (#561) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address coderabbit Major: the SSE endpoint hardcoded AnthropicProvider. Now resolve provider via the documented chain (CODEFRAME_LLM_PROVIDER env → .codeframe/config.yaml → default anthropic) using the shared get_provider factory, mirroring runtime.py. The ANTHROPIC_API_KEY gate only applies when the resolved provider is anthropic, so OpenAI-compatible and local (ollama/vllm) providers work. Unknown provider types surface as in-stream error events. --- codeframe/ui/routers/prd_v2.py | 37 ++++++++++++++++++++++--- tests/ui/test_prd_stress_test_router.py | 36 +++++++++++++++++------- 2 files changed, 59 insertions(+), 14 deletions(-) diff --git a/codeframe/ui/routers/prd_v2.py b/codeframe/ui/routers/prd_v2.py index 76bbd45f..124e1cbb 100644 --- a/codeframe/ui/routers/prd_v2.py +++ b/codeframe/ui/routers/prd_v2.py @@ -218,17 +218,46 @@ async def _stress_test_event_stream( }) return - api_key = os.getenv("ANTHROPIC_API_KEY") - if not api_key: + # Resolve the LLM provider following the documented chain: + # env var → workspace config (.codeframe/config.yaml) → default "anthropic". + # (No CLI flag here — this is the web surface.) Mirrors runtime.py. + from codeframe.adapters.llm import get_provider + from codeframe.core.config import load_environment_config + + env_cfg = load_environment_config(workspace.repo_path) + llm_cfg = env_cfg.llm if (env_cfg and env_cfg.llm) else None + provider_type = ( + os.getenv("CODEFRAME_LLM_PROVIDER") + or (llm_cfg.provider if llm_cfg else None) + or "anthropic" + ) + + # Only the Anthropic provider needs an API key up front; local providers + # (ollama/vllm/compatible) do not. + if provider_type == "anthropic" and not os.getenv("ANTHROPIC_API_KEY"): yield _sse({ "type": "error", "message": "ANTHROPIC_API_KEY environment variable required.", }) return - from codeframe.adapters.llm.anthropic import AnthropicProvider + provider_kwargs: dict = {} + model_override = os.getenv("CODEFRAME_LLM_MODEL") or ( + llm_cfg.model if llm_cfg else None + ) + base_url_override = (llm_cfg.base_url if llm_cfg else None) or os.getenv( + "OPENAI_BASE_URL" + ) + if model_override: + provider_kwargs["model"] = model_override + if base_url_override: + provider_kwargs["base_url"] = base_url_override - provider = AnthropicProvider(api_key=api_key) + try: + provider = get_provider(provider_type, **provider_kwargs) + except ValueError as exc: + yield _sse({"type": "error", "message": str(exc)}) + return async for event in stress_test_prd_stream( record.content, provider, max_depth=max_depth, diff --git a/tests/ui/test_prd_stress_test_router.py b/tests/ui/test_prd_stress_test_router.py index 22413c09..e32885ff 100644 --- a/tests/ui/test_prd_stress_test_router.py +++ b/tests/ui/test_prd_stress_test_router.py @@ -115,12 +115,12 @@ def _parse_sse(text: str) -> list[dict]: class TestStressTestEndpoint: - @patch("codeframe.adapters.llm.anthropic.AnthropicProvider") + @patch("codeframe.adapters.llm.get_provider") def test_streams_event_sequence( - self, mock_provider_cls, test_client, mock_provider, monkeypatch + self, mock_get_provider, test_client, mock_provider, monkeypatch ): monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-test-fake") - mock_provider_cls.return_value = mock_provider + mock_get_provider.return_value = mock_provider prd_module.store(test_client.workspace, SAMPLE_PRD, "Invoice SaaS", {}) response = test_client.get("/api/v2/prd/stress-test") @@ -134,12 +134,12 @@ def test_streams_event_sequence( assert types.count("goal_analyzed") == 3 assert events[-1]["ambiguity_count"] == 1 - @patch("codeframe.adapters.llm.anthropic.AnthropicProvider") + @patch("codeframe.adapters.llm.get_provider") def test_no_prd_emits_error_event( - self, mock_provider_cls, test_client, mock_provider, monkeypatch + self, mock_get_provider, test_client, mock_provider, monkeypatch ): monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-test-fake") - mock_provider_cls.return_value = mock_provider + mock_get_provider.return_value = mock_provider response = test_client.get("/api/v2/prd/stress-test") assert response.status_code == 200 @@ -157,6 +157,24 @@ def test_missing_api_key_emits_error_event(self, test_client, monkeypatch): assert events[-1]["type"] == "error" assert "ANTHROPIC_API_KEY" in events[-1]["message"] + @patch("codeframe.adapters.llm.get_provider") + def test_non_anthropic_provider_does_not_require_anthropic_key( + self, mock_get_provider, test_client, mock_provider, monkeypatch + ): + # A local/OpenAI-compatible provider is selected via env; the Anthropic + # key gate must not apply and the stream should run to completion. + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + monkeypatch.setenv("CODEFRAME_LLM_PROVIDER", "ollama") + mock_get_provider.return_value = mock_provider + prd_module.store(test_client.workspace, SAMPLE_PRD, "Invoice SaaS", {}) + + response = test_client.get("/api/v2/prd/stress-test") + assert response.status_code == 200 + events = _parse_sse(response.text) + assert events[-1]["type"] == "complete" + # Provider was resolved via the chain, not hardcoded to Anthropic. + assert mock_get_provider.call_args.args[0] == "ollama" + class TestStressTestDisconnect: """The stream must stop issuing LLM calls once the client disconnects.""" @@ -164,12 +182,11 @@ class TestStressTestDisconnect: async def test_aborts_when_client_disconnects( self, test_workspace, mock_provider, monkeypatch ): - import codeframe.adapters.llm.anthropic as anthropic_mod from codeframe.ui.routers.prd_v2 import _stress_test_event_stream monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-test-fake") monkeypatch.setattr( - anthropic_mod, "AnthropicProvider", lambda *a, **k: mock_provider + "codeframe.adapters.llm.get_provider", lambda *a, **k: mock_provider ) prd_module.store(test_workspace, SAMPLE_PRD, "Invoice SaaS", {}) @@ -202,12 +219,11 @@ async def is_disconnected(self): async def test_completes_when_client_stays_connected( self, test_workspace, mock_provider, monkeypatch ): - import codeframe.adapters.llm.anthropic as anthropic_mod from codeframe.ui.routers.prd_v2 import _stress_test_event_stream monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-test-fake") monkeypatch.setattr( - anthropic_mod, "AnthropicProvider", lambda *a, **k: mock_provider + "codeframe.adapters.llm.get_provider", lambda *a, **k: mock_provider ) prd_module.store(test_workspace, SAMPLE_PRD, "Invoice SaaS", {}) From 2015b2508ec2ff6194a2c8cd3ef3ad6f0971c788 Mon Sep 17 00:00:00 2001 From: Test User Date: Tue, 26 May 2026 18:09:38 -0700 Subject: [PATCH 5/5] docs: sync for PRD stress-test web UI (#561) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark Phase 5.4 trigger+streaming shipped in CLAUDE.md, PRODUCT_ROADMAP, and the CLI↔API mapping; note results rendering (#562) still pending. --- CLAUDE.md | 2 ++ docs/PHASE_2_CLI_API_MAPPING.md | 3 ++- docs/PRODUCT_ROADMAP.md | 7 +++---- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 1b10106d..faabb651 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -36,6 +36,8 @@ If you are an agent working in this repo: **do not improvise architecture**. Fol ### Current Focus: Phase 4A +**Phase 5.4 is complete** — PRD stress-test web UI: trigger + streaming (#561). Backend: `GET /api/v2/prd/stress-test` SSE endpoint streams `goals_extracted`, `goal_analyzed`, `complete`, and `error` events from `core/prd_stress_test.py:stress_test_prd_stream()`, resolving the LLM provider via the standard chain and applying the standard rate limit. Frontend: `useStressTestStream` hook manages the SSE connection and event accumulation; `StressTestModal` renders the streaming progress and is opened via a "Stress Test" button on the `/prd` page (enabled only when a PRD exists). Results rendering (#562) is out of scope and still pending. + **Phase 5.3 is complete** — Async notifications cover both surfaces: - **Browser + in-app center (#559)**: `useNotifications` hook with workspace-scoped `localStorage` persistence and browser Notification dispatch (only when tab hidden + permission granted); `NotificationProvider` in root layout; `NotificationCenter` (bell icon + dropdown) mounts in sidebar footer. `BatchExecutionMonitor` dispatches `batch.completed` on terminal status transitions (distinguishing COMPLETED/FAILED/CANCELLED in both the in-app message and the success icon) and `blocker.created` on per-task BLOCKED transitions. `/execution` requests browser permission once on mount when permission is `'default'`. `/proof` dispatches `gate.run.failed` per failed gate when a proof run completes with `passed === false`. Known limitation: notifications only fire while `BatchExecutionMonitor` is mounted (cross-page background poller is out of scope; tracked for future work). - **Outbound webhook (#560)**: Settings → Notifications tab takes a single URL + enabled toggle, persisted to `.codeframe/notifications_config.json` via `atomic_write_json`. `GET/PUT /api/v2/settings/notifications` and `POST /api/v2/settings/notifications/test` (test fires a sample payload and surfaces status code). `WebhookNotificationService.send_event` is the generic backend; dispatched fire-and-forget (5s timeout) from `core/conductor.py` on `BATCH_COMPLETED` only (not PARTIAL/FAILED/CANCELLED), `core/blockers.py:create()` after `BLOCKER_CREATED`, and `ui/routers/pr_v2.py:merge_pull_request` after successful merge. Failures are logged but never break the triggering operation. diff --git a/docs/PHASE_2_CLI_API_MAPPING.md b/docs/PHASE_2_CLI_API_MAPPING.md index 713654b6..93c5834b 100644 --- a/docs/PHASE_2_CLI_API_MAPPING.md +++ b/docs/PHASE_2_CLI_API_MAPPING.md @@ -38,8 +38,9 @@ Both end up with PRD records managed by `core.prd`. | `cf prd export` | `core.prd` | `export_to_file()` | (CLI-only) | - | N/A | | `cf prd versions` | `core.prd` | `get_versions()` | `/api/v2/prd/{id}/versions` | GET | ✅ Present | | `cf prd diff` | `core.prd` | `diff_versions()` | `/api/v2/prd/{id}/diff` | GET | ✅ Present | +| `cf prd stress-test` | `core.prd_stress_test` | `stress_test_prd_stream()` | `/api/v2/prd/stress-test` | GET (SSE) | ✅ Present | -**Note:** Both Discovery workflow and PRD CRUD are now complete ✅. +**Note:** Both Discovery workflow and PRD CRUD are now complete ✅. The stress-test SSE endpoint (#561) is present; web UI results rendering (#562) is pending. ### Task Commands diff --git a/docs/PRODUCT_ROADMAP.md b/docs/PRODUCT_ROADMAP.md index 1fd7d1bb..84605328 100644 --- a/docs/PRODUCT_ROADMAP.md +++ b/docs/PRODUCT_ROADMAP.md @@ -147,11 +147,10 @@ Without a settings page, a new user who cannot find the env vars cannot use the ### 4. PRD Stress-Test Web UI -**Current state**: The CLI has `cf prd stress-test` for recursive decomposition — it takes the PRD and surfaces ambiguities the agent cannot resolve without human input. This is described in the vision as a core part of the THINK phase. The web UI has no equivalent; users who work exclusively in the browser never see this step. +**Current state**: Phase 5.4 trigger + streaming shipped (#561). The `/prd` page now has a "Stress Test" button (enabled only when a PRD exists) that opens `StressTestModal`. The modal connects via `useStressTestStream` to `GET /api/v2/prd/stress-test` (SSE), which streams `goals_extracted`, `goal_analyzed`, `complete`, and `error` events from `core/prd_stress_test.py`. Results rendering — displaying the decomposition tree, surfacing ambiguities as answerable questions, feeding answers back to refine the PRD — is tracked in #562 and is not yet built. -**What to build**: +**What remains (#562)**: -- A **[Stress Test]** button on the PRD page that triggers the stress-test process - A **results view** showing the decomposition tree with ambiguities surfaced as questions, styled similarly to the existing Discovery transcript - Each ambiguity has an inline answer field — the user's answers are fed back to refine the PRD - On completion: the refined PRD is saved and the user can proceed to task generation @@ -204,7 +203,7 @@ These are items that were considered and excluded because they do not serve the | 5.1 | Settings page (skeleton + agent config + PROOF9/workspace tabs) | ✅ Complete | #554–556 | | 5.2 | Cost analytics | ✅ Complete | #557–558 | | 5.3 | Async notifications | ✅ Complete (browser + in-app center #559, webhook #560) | #559–560 | -| 5.4 | PRD stress-test web UI | ❌ Not started | #561–562 | +| 5.4 | PRD stress-test web UI | ✅ Complete (trigger + streaming #561; results rendering #562 pending) | #561–562 | | 5.5 | GitHub Issues import | ❌ Not started | #563–565 | **Current focus**: Phase 4A — PR status tracking + PROOF9 merge gate.