Skip to content

Commit 04025bf

Browse files
Talha Jubair Siamtsensei
authored andcommitted
fix(pipeline): split gate evaluations from revision rounds in cost estimate
Previously, evaluationsCompleted was passed as a single count and the cost formula charged (critic + director) per unit. This overcounted when the gate evaluated but didn't revise (score >= 7 first try): charged for a director call that never happened. Now estimateCost takes separate gateEvaluations and revisionRounds params. Cost formula: evaluations * critic_cost + revisions * director_cost.
1 parent 5463f83 commit 04025bf

4 files changed

Lines changed: 36 additions & 19 deletions

File tree

src/cli/cost-estimator.test.ts

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -119,17 +119,28 @@ describe("estimateCost", () => {
119119
expect(fal.videoCost).toBeGreaterThan(gemini.videoCost);
120120
});
121121

122-
it("includes revision cost when revisionRounds > 0", () => {
122+
it("includes revision cost with separate evaluation and revision counts", () => {
123123
const score = makeScore([{ visual_type: "ai_image", script_line: "Test" }]);
124-
const noRevision = estimateCost(score, "gemini", "elevenlabs", undefined, "anthropic", "bundled", 0);
125-
const withRevision = estimateCost(score, "gemini", "elevenlabs", undefined, "anthropic", "bundled", 2);
126-
expect(noRevision.revisionCost).toBe(0);
127-
expect(noRevision.details.revisionRounds).toBe(0);
128-
expect(withRevision.revisionCost).toBeGreaterThan(0);
129-
expect(withRevision.details.revisionRounds).toBe(2);
130-
expect(withRevision.totalCost).toBeGreaterThan(noRevision.totalCost);
131-
// Revision cost should be approximately 2 × (critic + director) calls
132-
expect(withRevision.revisionCost).toBeCloseTo(withRevision.totalCost - noRevision.totalCost, 4);
124+
// No gate activity
125+
const noGate = estimateCost(score, "gemini", "elevenlabs", undefined, "anthropic", "bundled", 0, 0);
126+
expect(noGate.revisionCost).toBe(0);
127+
expect(noGate.details.gateEvaluations).toBe(0);
128+
expect(noGate.details.revisionRounds).toBe(0);
129+
130+
// Gate only (score >= 7 on first try): 1 eval, 0 revisions
131+
const gateOnly = estimateCost(score, "gemini", "elevenlabs", undefined, "anthropic", "bundled", 1, 0);
132+
expect(gateOnly.revisionCost).toBeGreaterThan(0);
133+
expect(gateOnly.details.gateEvaluations).toBe(1);
134+
expect(gateOnly.details.revisionRounds).toBe(0);
135+
136+
// Full revision: 2 evals + 1 revision
137+
const fullRevision = estimateCost(score, "gemini", "elevenlabs", undefined, "anthropic", "bundled", 2, 1);
138+
expect(fullRevision.revisionCost).toBeGreaterThan(gateOnly.revisionCost);
139+
expect(fullRevision.details.gateEvaluations).toBe(2);
140+
expect(fullRevision.details.revisionRounds).toBe(1);
141+
142+
// Gate-only should cost less than a full revision (no director call)
143+
expect(gateOnly.revisionCost).toBeLessThan(fullRevision.revisionCost);
133144
});
134145

135146
it("uses gemini LLM pricing when llmProvider is gemini", () => {

src/cli/cost-estimator.ts

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ export interface CostBreakdown {
1818
totalCost: number;
1919
details: {
2020
llmCalls: number;
21+
gateEvaluations: number;
2122
revisionRounds: number;
2223
ttsCharacters: number;
2324
aiImages: number;
@@ -98,6 +99,7 @@ export function estimateCost(
9899
videoProvider?: VideoProviderKey,
99100
llmProvider: LLMProviderKey = "anthropic",
100101
musicProvider: MusicProviderKey = "bundled",
102+
gateEvaluations = 0,
101103
revisionRounds = 0,
102104
): CostBreakdown {
103105
const aiImageScenes = score.scenes.filter((s) => s.visual_type === "ai_image").length;
@@ -118,10 +120,10 @@ export function estimateCost(
118120
callCost(TOKEN_ESTIMATES.critic) +
119121
aiImages * callCost(TOKEN_ESTIMATES.imagePrompter);
120122

121-
// Revision cost: each evaluation = 1 critic call + 1 director-sized revise call.
122-
// Slightly undercounts because revise prompts include the original score JSON (~1-3K extra tokens),
123-
// but using the director estimate is a reasonable approximation.
124-
const revisionCost = revisionRounds * (callCost(TOKEN_ESTIMATES.critic) + callCost(TOKEN_ESTIMATES.creativeDirector));
123+
// Quality gate cost: critic calls for evaluation + director calls for revision.
124+
// Split because evaluations > revisions (gate always evaluates, only revises if score < 7).
125+
// Slightly undercounts revise calls because the prompt includes the original score JSON (~1-3K extra tokens).
126+
const revisionCost = gateEvaluations * callCost(TOKEN_ESTIMATES.critic) + revisionRounds * callCost(TOKEN_ESTIMATES.creativeDirector);
125127
const ttsPerChar = PRICING.ttsPerChar[ttsProvider];
126128
const ttsCost = ttsCharacters * ttsPerChar;
127129
const perImage = imageProvider === "openai" ? PRICING.openaiPerImage : PRICING.geminiPerImage;
@@ -161,7 +163,7 @@ export function estimateCost(
161163

162164
return {
163165
llmCost, revisionCost, ttsCost, imageCost, videoCost, musicCost, totalCost,
164-
details: { llmCalls, revisionRounds, ttsCharacters, aiImages, aiVideos: aiVideoScenes },
166+
details: { llmCalls, gateEvaluations, revisionRounds, ttsCharacters, aiImages, aiVideos: aiVideoScenes },
165167
perScene,
166168
};
167169
}
@@ -177,7 +179,10 @@ export function formatCostEstimate(
177179
` LLM: $${breakdown.llmCost.toFixed(4)} (${breakdown.details.llmCalls} calls)`,
178180
];
179181
if (breakdown.revisionCost > 0) {
180-
lines.push(` Gate: $${breakdown.revisionCost.toFixed(4)} (${breakdown.details.revisionRounds} quality gate eval${breakdown.details.revisionRounds !== 1 ? "s" : ""})`);
182+
const evals = breakdown.details.gateEvaluations;
183+
const revs = breakdown.details.revisionRounds;
184+
const detail = revs > 0 ? `${evals} eval${evals !== 1 ? "s" : ""}, ${revs} revision${revs !== 1 ? "s" : ""}` : `${evals} eval${evals !== 1 ? "s" : ""}`;
185+
lines.push(` Gate: $${breakdown.revisionCost.toFixed(4)} (${detail})`);
181186
}
182187
lines.push(
183188
` TTS: $${breakdown.ttsCost.toFixed(4)} (${breakdown.details.ttsCharacters} chars)`,

src/pipeline/orchestrator-callbacks.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ describe("createCliCallbacks", () => {
8282
totalCost: 0.1,
8383
videoCost: 0,
8484
musicCost: 0,
85-
details: { llmCalls: 3, revisionRounds: 0, ttsCharacters: 500, aiImages: 2, aiVideos: 0 },
85+
details: { llmCalls: 3, gateEvaluations: 0, revisionRounds: 0, ttsCharacters: 500, aiImages: 2, aiVideos: 0 },
8686
},
8787
"gemini",
8888
);

src/pipeline/orchestrator.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -510,8 +510,9 @@ function buildPipelineWorkflow(
510510
}
511511

512512
// Cost estimation (uses the final revised score for accurate scene counts)
513-
// Pass evaluationsCompleted so the cost includes the gate evaluation + any revision rounds
514-
const costBreakdown = estimateCost(score, opts.imageProvider, opts.ttsProvider, opts.videoProvider, opts.llm.id, opts.musicProviderKey, evaluationsCompleted);
513+
// Pass evaluations and revisions separately: evaluations count critic calls,
514+
// revisions count director calls (evaluations >= revisions since gate always evaluates)
515+
const costBreakdown = estimateCost(score, opts.imageProvider, opts.ttsProvider, opts.videoProvider, opts.llm.id, opts.musicProviderKey, evaluationsCompleted, revisionRoundsCompleted);
515516
directorResult.costBreakdown = costBreakdown;
516517
log.totalCost = { estimated: costBreakdown.totalCost };
517518

0 commit comments

Comments
 (0)