Skip to content

Commit a3809bc

Browse files
committed
Telemetry and tuning loop
1 parent 92b7c0d commit a3809bc

8 files changed

Lines changed: 187 additions & 4 deletions

File tree

README.md

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ Users enter a product idea in plain English, and the system coordinates multiple
55

66
Currently, this repository contains the **Core Backend & Orchestration Layer** (v1), which features a clean provider abstraction, strict Zod validation, and real-time Server-Sent Events (SSE) streaming for agent progress.
77

8+
The backend now runs on **real OpenRouter provider calls** with per-agent token optimization (input compression, output caps, and budget guardrails).
9+
810
---
911

1012
## 🚀 Tech Stack & Tools Needed
@@ -92,10 +94,73 @@ apps/
9294
web/ # Frontend Web App (React/Next.js stub)
9395
9496
packages/
95-
agents/ # Core Orchestration, 6 Subagents, and LLM Provider mock
97+
agents/ # Core Orchestration, 6 Subagents, OpenRouter provider, token optimizer
9698
shared/ # Zod Schemas, Constant Enums, and TS Contract Types
9799
ui/ # Reusable UI primitives stub
98100
config/ # ESLint/TSConfig stubs
99101
```
100102

103+
---
104+
105+
## ⚙️ OpenRouter Runtime Configuration
106+
107+
Set these variables in `apps/api/.env`:
108+
109+
```bash
110+
OPENROUTER_API_KEY=your_key_here
111+
OPENROUTER_ENDPOINT=https://openrouter.ai/api/v1/chat/completions
112+
OPENROUTER_APP_NAME=stackforge-api
113+
OPENROUTER_APP_URL=http://localhost:3001
114+
```
115+
116+
---
117+
118+
## 📉 Token Tuning Guide
119+
120+
Per-agent tuning lives in `packages/agents/src/config/agent.configs.ts`.
121+
122+
- `maxInputTokens`: hard input cap used by optimizer compression.
123+
- `maxOutputTokens`: maximum completion tokens requested from provider.
124+
- `minOutputTokens`: minimum output budget required after compression.
125+
- `tokenBudget`: total budget target used to derive dynamic output caps.
126+
- `compressionLevel`: default compression aggressiveness (`low` / `medium` / `high`).
127+
- `budgetOverflowRetries`: number of extra compression passes before fail-fast.
128+
129+
**Suggested workflow:**
130+
1. Run 3–5 representative prompts.
131+
2. Inspect per-agent SSE `agent_completed` telemetry.
132+
3. Lower `maxInputTokens` or raise `compressionLevel` for agents with high `inputTokens`.
133+
4. Lower `maxOutputTokens` for agents with consistently low `outputTokens`.
134+
5. Raise `minOutputTokens` only if quality drops from over-compression.
135+
136+
---
137+
138+
## 📡 SSE Agent Telemetry
139+
140+
Each `agent_completed` event includes token and optimizer metrics:
141+
142+
```json
143+
{
144+
"type": "agent_completed",
145+
"agent": "schema",
146+
"payload": {
147+
"durationMs": 842,
148+
"cached": false,
149+
"inputTokens": 612,
150+
"outputTokens": 431,
151+
"totalTokens": 1043,
152+
"tokensUsed": 1043,
153+
"estimatedInputTokens": 590,
154+
"compressionPasses": 2,
155+
"providerInputTokens": 612,
156+
"providerOutputTokens": 431,
157+
"model": "openai/gpt-4o-mini"
158+
}
159+
}
160+
```
161+
162+
Per-job aggregates are available in REST responses:
163+
- `GET /api/jobs` returns all jobs with `tokenUsage` summaries.
164+
- `GET /api/jobs/:jobId` includes the same `tokenUsage` object for a single run.
165+
101166

apps/api/src/controllers/jobs.controller.ts

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,24 @@
11
import type { Request, Response, NextFunction } from "express";
22
import { JobIdParamSchema, JOB_STATUS } from "@stackforge/shared";
3-
import { getJob } from "../store/job.store.js";
3+
import { getJob, listJobs, summarizeJobTokenUsage } from "../store/job.store.js";
44
import { subscribe, unsubscribe } from "../services/sse.service.js";
55

6+
export function listJobsController(_req: Request, res: Response): void {
7+
const jobs = listJobs().map((job) => ({
8+
id: job.id,
9+
status: job.status,
10+
projectName: job.projectName,
11+
createdAt: job.createdAt,
12+
updatedAt: job.updatedAt,
13+
completedAt: job.completedAt,
14+
agentsCompleted: job.agentsCompleted,
15+
error: job.error,
16+
tokenUsage: summarizeJobTokenUsage(job),
17+
}));
18+
19+
res.json({ jobs });
20+
}
21+
622
export function getJobController(req: Request, res: Response, next: NextFunction): void {
723
const parsed = JobIdParamSchema.safeParse(req.params);
824
if (!parsed.success) {
@@ -26,6 +42,7 @@ export function getJobController(req: Request, res: Response, next: NextFunction
2642
agentsCompleted: job.agentsCompleted,
2743
error: job.error,
2844
blueprint: job.blueprint,
45+
tokenUsage: summarizeJobTokenUsage(job),
2946
});
3047
}
3148

apps/api/src/routes/index.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import { Router, type IRouter } from "express";
22
import { generateController } from "../controllers/generate.controller.js";
3-
import { getJobController, streamController } from "../controllers/jobs.controller.js";
3+
import { listJobsController, getJobController, streamController } from "../controllers/jobs.controller.js";
44

55
const router: IRouter = Router();
66

77
router.post("/generate", generateController);
8+
router.get("/jobs", listJobsController);
89
router.get("/jobs/:jobId", getJobController);
910
router.get("/stream/:jobId", streamController);
1011

apps/api/src/store/job.store.ts

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
import { randomUUID } from "node:crypto";
2-
import type { Blueprint, SSEEvent, AgentName } from "@stackforge/shared";
2+
import type {
3+
Blueprint,
4+
SSEEvent,
5+
AgentName,
6+
AgentCompletedEvent,
7+
} from "@stackforge/shared";
38
import { JOB_STATUS } from "@stackforge/shared";
49

510
export type StoredJob = {
@@ -18,6 +23,62 @@ export type StoredJob = {
1823

1924
const store = new Map<string, StoredJob>();
2025

26+
export type JobTokenUsage = {
27+
totalTokens: number;
28+
inputTokens: number;
29+
outputTokens: number;
30+
completionEvents: number;
31+
byAgent: Partial<Record<AgentName, {
32+
totalTokens: number;
33+
inputTokens: number;
34+
outputTokens: number;
35+
count: number;
36+
}>>;
37+
};
38+
39+
function isAgentCompletedEvent(event: SSEEvent): event is AgentCompletedEvent {
40+
return event.type === "agent_completed";
41+
}
42+
43+
export function summarizeJobTokenUsage(job: StoredJob): JobTokenUsage {
44+
const initial: JobTokenUsage = {
45+
totalTokens: 0,
46+
inputTokens: 0,
47+
outputTokens: 0,
48+
completionEvents: 0,
49+
byAgent: {},
50+
};
51+
52+
for (const event of job.events) {
53+
if (!isAgentCompletedEvent(event)) {
54+
continue;
55+
}
56+
57+
initial.totalTokens += event.payload.totalTokens;
58+
initial.inputTokens += event.payload.inputTokens;
59+
initial.outputTokens += event.payload.outputTokens;
60+
initial.completionEvents += 1;
61+
62+
const agent = event.agent as AgentName;
63+
64+
const existing = initial.byAgent[agent] ?? {
65+
totalTokens: 0,
66+
inputTokens: 0,
67+
outputTokens: 0,
68+
count: 0,
69+
};
70+
71+
initial.byAgent[agent] = {
72+
totalTokens: existing.totalTokens + event.payload.totalTokens,
73+
inputTokens: existing.inputTokens + event.payload.inputTokens,
74+
outputTokens: existing.outputTokens + event.payload.outputTokens,
75+
count: existing.count + 1,
76+
};
77+
}
78+
79+
return initial;
80+
}
81+
2182
export function createJob(prompt: string, projectName: string): StoredJob {
2283
const now = new Date().toISOString();
2384
const job: StoredJob = {
@@ -38,6 +99,10 @@ export function getJob(id: string): StoredJob | undefined {
3899
return store.get(id);
39100
}
40101

102+
export function listJobs(): StoredJob[] {
103+
return [...store.values()].sort((a, b) => b.createdAt.localeCompare(a.createdAt));
104+
}
105+
41106
export function updateJob(
42107
id: string,
43108
patch: Partial<Omit<StoredJob, "id" | "createdAt" | "events">>,

apps/api/test/integration.test.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,5 +52,16 @@ describe("StackForge API Integration", () => {
5252
const jobData = await jobRes.json();
5353
expect(jobData.id).toBe(data.jobId);
5454
expect(["queued", "running", "completed", "failed"]).toContain(jobData.status);
55+
expect(jobData.tokenUsage).toBeDefined();
56+
expect(typeof jobData.tokenUsage.totalTokens).toBe("number");
57+
58+
const jobsRes = await fetch(`${baseUrl}/api/jobs`);
59+
expect(jobsRes.status).toBe(200);
60+
const jobsData = await jobsRes.json();
61+
expect(Array.isArray(jobsData.jobs)).toBe(true);
62+
const createdJob = jobsData.jobs.find((job: { id: string }) => job.id === data.jobId);
63+
expect(createdJob).toBeDefined();
64+
expect(createdJob.tokenUsage).toBeDefined();
65+
expect(typeof createdJob.tokenUsage.totalTokens).toBe("number");
5566
});
5667
});

packages/agents/src/agents/base.agent.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ export type AgentRunResult<T> = {
1010
cached: boolean;
1111
durationMs: number;
1212
tokensUsed: number;
13+
inputTokens: number;
14+
outputTokens: number;
15+
totalTokens: number;
1316
estimatedInputTokens: number;
1417
compressionPasses: number;
1518
providerInputTokens: number;
@@ -33,6 +36,9 @@ export async function runAgent<TInput, TOutput>(
3336
cached: true,
3437
durationMs: 0,
3538
tokensUsed: 0,
39+
inputTokens: 0,
40+
outputTokens: 0,
41+
totalTokens: 0,
3642
estimatedInputTokens: 0,
3743
compressionPasses: 0,
3844
providerInputTokens: 0,
@@ -67,6 +73,9 @@ export async function runAgent<TInput, TOutput>(
6773
cached: false,
6874
durationMs,
6975
tokensUsed: response.tokensUsed,
76+
inputTokens: response.inputTokens ?? optimized.estimatedInputTokens,
77+
outputTokens: response.outputTokens ?? optimized.maxOutputTokens,
78+
totalTokens: response.tokensUsed,
7079
estimatedInputTokens: optimized.estimatedInputTokens,
7180
compressionPasses: optimized.compressionPasses,
7281
providerInputTokens: response.inputTokens ?? optimized.estimatedInputTokens,

packages/agents/src/orchestrator/orchestrator.service.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ function agentCompleted(
3535
agent: AgentName,
3636
durationMs: number,
3737
cached: boolean,
38+
inputTokens: number,
39+
outputTokens: number,
40+
totalTokens: number,
3841
tokensUsed: number,
3942
estimatedInputTokens: number,
4043
compressionPasses: number,
@@ -50,6 +53,9 @@ function agentCompleted(
5053
payload: {
5154
durationMs,
5255
cached,
56+
inputTokens,
57+
outputTokens,
58+
totalTokens,
5359
tokensUsed,
5460
estimatedInputTokens,
5561
compressionPasses,
@@ -72,6 +78,9 @@ async function runWithEmit<T>(
7278
output: T;
7379
cached: boolean;
7480
durationMs: number;
81+
inputTokens: number;
82+
outputTokens: number;
83+
totalTokens: number;
7584
tokensUsed: number;
7685
estimatedInputTokens: number;
7786
compressionPasses: number;
@@ -89,6 +98,9 @@ async function runWithEmit<T>(
8998
agentName,
9099
result.durationMs,
91100
result.cached,
101+
result.inputTokens,
102+
result.outputTokens,
103+
result.totalTokens,
92104
result.tokensUsed,
93105
result.estimatedInputTokens,
94106
result.compressionPasses,

packages/shared/src/schemas/sse.schema.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ export const AgentCompletedEventSchema = SSEBaseSchema.extend({
2626
payload: z.object({
2727
durationMs: z.number(),
2828
cached: z.boolean(),
29+
inputTokens: z.number(),
30+
outputTokens: z.number(),
31+
totalTokens: z.number(),
2932
tokensUsed: z.number(),
3033
estimatedInputTokens: z.number(),
3134
compressionPasses: z.number(),

0 commit comments

Comments
 (0)