|
5 | 5 | type SavingsLiveComparisonCost, |
6 | 6 | type SavingsReferenceRow, |
7 | 7 | type SavingsReferenceTotals, |
| 8 | + classifyDrift, |
| 9 | + type DriftReport, |
8 | 10 | savingsLiveComparison, |
9 | 11 | savingsLiveComparisonCost, |
10 | 12 | savingsReferenceTotals, |
@@ -40,6 +42,16 @@ interface GainOptions { |
40 | 42 | topOps?: string; |
41 | 43 | } |
42 | 44 |
|
| 45 | +interface GainDriftOptions { |
| 46 | + json?: boolean; |
| 47 | + baselineDays?: string; |
| 48 | + recentDays?: string; |
| 49 | + minCalls?: string; |
| 50 | + threshold?: string; |
| 51 | + downThreshold?: string; |
| 52 | + operation?: string; |
| 53 | +} |
| 54 | + |
43 | 55 | export interface MoverRow { |
44 | 56 | operation: string; |
45 | 57 | recent_calls: number; |
@@ -78,7 +90,7 @@ interface TopErrorReason { |
78 | 90 | } |
79 | 91 |
|
80 | 92 | export function registerGainCommand(program: Command): void { |
81 | | - program |
| 93 | + const gain = program |
82 | 94 | .command('gain') |
83 | 95 | .description('Show colony token/cost savings from live mcp_metrics receipts') |
84 | 96 | .option('--json', 'emit structured JSON') |
@@ -250,6 +262,91 @@ export function registerGainCommand(program: Command): void { |
250 | 262 | movers, |
251 | 263 | ); |
252 | 264 | }); |
| 265 | + |
| 266 | + gain |
| 267 | + .command('drift') |
| 268 | + .description( |
| 269 | + 'Flag tools whose median tokens-per-call has drifted vs a baseline window (no schema change)', |
| 270 | + ) |
| 271 | + .option('--baseline-days <n>', 'baseline window length in days (default 14)') |
| 272 | + .option('--recent-days <n>', 'recent window length in days (default 3)') |
| 273 | + .option('--min-calls <n>', 'minimum sample size per window to trust signal (default 20)') |
| 274 | + .option('--threshold <ratio>', 'up-drift trigger ratio (default 1.25 = +25%)') |
| 275 | + .option('--down-threshold <ratio>', 'down-drift trigger ratio (default 0.75 = -25%)') |
| 276 | + .option('--operation <name>', 'show only this operation row in the table') |
| 277 | + .option('--json', 'emit structured JSON') |
| 278 | + .action(async (opts: GainDriftOptions) => { |
| 279 | + const settings = loadSettings(); |
| 280 | + const baselineDays = parsePositiveFloat(opts.baselineDays) ?? 14; |
| 281 | + const recentDays = parsePositiveFloat(opts.recentDays) ?? 3; |
| 282 | + const minCalls = parsePositiveInt(opts.minCalls) ?? 20; |
| 283 | + const threshold = parsePositiveFloat(opts.threshold) ?? 1.25; |
| 284 | + const downThreshold = parsePositiveFloat(opts.downThreshold) ?? 0.75; |
| 285 | + |
| 286 | + const now = Date.now(); |
| 287 | + const recentSince = now - recentDays * 24 * 60 * 60_000; |
| 288 | + // 3-day gap between recent and baseline so day-of-week noise does not |
| 289 | + // bleed across — see spec/v0.x roadmap. |
| 290 | + const baselineUntil = recentSince - 3 * 24 * 60 * 60_000; |
| 291 | + const baselineSince = baselineUntil - baselineDays * 24 * 60 * 60_000; |
| 292 | + const recentUntil = now; |
| 293 | + |
| 294 | + const { rawRows, minTs } = await withStorage( |
| 295 | + settings, |
| 296 | + (storage) => { |
| 297 | + const allRows = storage.mcpTokenDriftPerOperation({ |
| 298 | + baseline_since: baselineSince, |
| 299 | + baseline_until: baselineUntil, |
| 300 | + recent_since: recentSince, |
| 301 | + recent_until: recentUntil, |
| 302 | + }); |
| 303 | + const filtered = |
| 304 | + opts.operation !== undefined |
| 305 | + ? allRows.filter((row) => row.operation === opts.operation) |
| 306 | + : allRows; |
| 307 | + return { rawRows: filtered, minTs: storage.mcpMetricsMinTs() }; |
| 308 | + }, |
| 309 | + { readonly: true }, |
| 310 | + ); |
| 311 | + |
| 312 | + const report = classifyDrift(rawRows, { |
| 313 | + threshold, |
| 314 | + down_threshold: downThreshold, |
| 315 | + min_calls: minCalls, |
| 316 | + }); |
| 317 | + |
| 318 | + const baselineWarning = |
| 319 | + minTs !== null && minTs > baselineSince |
| 320 | + ? `baseline window starts before first recorded metric — drift detection needs ~${ |
| 321 | + Math.ceil((recentDays + baselineDays + 3) - (now - minTs) / (24 * 60 * 60_000)) |
| 322 | + } more day${baselineDays > 1 ? 's' : ''} of history` |
| 323 | + : null; |
| 324 | + |
| 325 | + if (opts.json === true) { |
| 326 | + const payload = { |
| 327 | + window: { |
| 328 | + baseline_since: baselineSince, |
| 329 | + baseline_until: baselineUntil, |
| 330 | + recent_since: recentSince, |
| 331 | + recent_until: recentUntil, |
| 332 | + }, |
| 333 | + threshold: report.threshold, |
| 334 | + rows: report.rows, |
| 335 | + new_tools: report.new_tools, |
| 336 | + gone_tools: report.gone_tools, |
| 337 | + insufficient_data: report.insufficient_data, |
| 338 | + ...(baselineWarning !== null ? { warning: baselineWarning } : {}), |
| 339 | + }; |
| 340 | + process.stdout.write(`${JSON.stringify(payload, null, 2)}\n`); |
| 341 | + return; |
| 342 | + } |
| 343 | + |
| 344 | + writeDriftReport(report, { |
| 345 | + recentDays, |
| 346 | + baselineDays, |
| 347 | + baselineWarning, |
| 348 | + }); |
| 349 | + }); |
253 | 350 | } |
254 | 351 |
|
255 | 352 | export function writeGainReport( |
@@ -1175,6 +1272,12 @@ function parsePositiveInt(raw: string | undefined): number | undefined { |
1175 | 1272 | return Number.isFinite(parsed) && parsed > 0 ? Math.floor(parsed) : undefined; |
1176 | 1273 | } |
1177 | 1274 |
|
| 1275 | +function parsePositiveFloat(raw: string | undefined): number | undefined { |
| 1276 | + if (raw === undefined || raw.trim() === '') return undefined; |
| 1277 | + const parsed = Number(raw); |
| 1278 | + return Number.isFinite(parsed) && parsed > 0 ? parsed : undefined; |
| 1279 | +} |
| 1280 | + |
1178 | 1281 | // rtk-style proportional bar. The row whose value equals `max` gets a full |
1179 | 1282 | // bar; smaller rows scale linearly. Empty when max <= 0. |
1180 | 1283 | export function renderImpactBar(value: number, max: number, width: number): string { |
@@ -1586,3 +1689,132 @@ function colorByEfficiency(pct: number, text: string): string { |
1586 | 1689 | if (pct >= 40) return kleur.yellow().bold(text); |
1587 | 1690 | return kleur.red().bold(text); |
1588 | 1691 | } |
| 1692 | + |
| 1693 | +export interface DriftReportInput { |
| 1694 | + recentDays: number; |
| 1695 | + baselineDays: number; |
| 1696 | + baselineWarning: string | null; |
| 1697 | +} |
| 1698 | + |
| 1699 | +// Plaintext rendering for `colony gain drift`. Mirrors the gain layout |
| 1700 | +// (kleur-colored, padded columns). Drift-classified rows print first; |
| 1701 | +// new/gone/insufficient sets get one-line summaries underneath so a |
| 1702 | +// quick scan answers "is anything regressing?" without re-running. |
| 1703 | +export function writeDriftReport(report: DriftReport, input: DriftReportInput): void { |
| 1704 | + const w = process.stdout; |
| 1705 | + const { recentDays, baselineDays, baselineWarning } = input; |
| 1706 | + w.write( |
| 1707 | + `${kleur.bold( |
| 1708 | + `colony gain drift (recent ${formatDaysLabel(recentDays)} vs baseline ${formatDaysLabel( |
| 1709 | + baselineDays, |
| 1710 | + )})`, |
| 1711 | + )}\n`, |
| 1712 | + ); |
| 1713 | + w.write( |
| 1714 | + kleur.dim( |
| 1715 | + `Thresholds: up >= ${report.threshold.up.toFixed(2)}x, down <= ${report.threshold.down.toFixed( |
| 1716 | + 2, |
| 1717 | + )}x, min ${report.threshold.min_calls} calls per window.\n`, |
| 1718 | + ), |
| 1719 | + ); |
| 1720 | + if (baselineWarning !== null) { |
| 1721 | + w.write(`${kleur.yellow('[warn] ')}${baselineWarning}\n`); |
| 1722 | + } |
| 1723 | + const tableRows = report.rows.filter( |
| 1724 | + (row) => |
| 1725 | + row.classification === 'up_drift' || |
| 1726 | + row.classification === 'down_drift' || |
| 1727 | + row.classification === 'stable', |
| 1728 | + ); |
| 1729 | + if (tableRows.length === 0 && report.new_tools.length === 0 && report.gone_tools.length === 0) { |
| 1730 | + w.write(kleur.dim('No operations had enough samples in both windows.\n')); |
| 1731 | + if (report.insufficient_data.length > 0) { |
| 1732 | + writeDriftInsufficient(report); |
| 1733 | + } |
| 1734 | + return; |
| 1735 | + } |
| 1736 | + if (tableRows.length > 0) { |
| 1737 | + const widths = [24, 13, 11, 8, 7, 7, 18]; |
| 1738 | + const head = padRow( |
| 1739 | + ['Operation', 'Baseline med', 'Recent med', 'Ratio', 'n_base', 'n_rec', 'Class'], |
| 1740 | + widths, |
| 1741 | + ); |
| 1742 | + w.write(`${kleur.dim(head)}\n`); |
| 1743 | + // Up-drift first (most urgent), then down, then stable. Within each |
| 1744 | + // bucket keep the storage-emitted alphabetical order so output is |
| 1745 | + // deterministic for tests. |
| 1746 | + const ordered = [ |
| 1747 | + ...tableRows.filter((row) => row.classification === 'up_drift'), |
| 1748 | + ...tableRows.filter((row) => row.classification === 'down_drift'), |
| 1749 | + ...tableRows.filter((row) => row.classification === 'stable'), |
| 1750 | + ]; |
| 1751 | + for (const row of ordered) { |
| 1752 | + const cells = [ |
| 1753 | + truncate(row.operation, widths[0] ?? 24), |
| 1754 | + formatTokens(row.baseline_median ?? 0), |
| 1755 | + formatTokens(row.recent_median ?? 0), |
| 1756 | + formatDriftRatio(row.ratio, row.classification), |
| 1757 | + formatInt(row.baseline_n), |
| 1758 | + formatInt(row.recent_n), |
| 1759 | + formatDriftClass(row.classification), |
| 1760 | + ]; |
| 1761 | + w.write(`${padRow(cells, widths)}\n`); |
| 1762 | + } |
| 1763 | + } |
| 1764 | + if (report.insufficient_data.length > 0) { |
| 1765 | + writeDriftInsufficient(report); |
| 1766 | + } |
| 1767 | + if (report.new_tools.length > 0) { |
| 1768 | + w.write( |
| 1769 | + `${kleur.dim('New tools (no baseline):')} ${report.new_tools.join(', ')}\n`, |
| 1770 | + ); |
| 1771 | + } |
| 1772 | + if (report.gone_tools.length > 0) { |
| 1773 | + w.write( |
| 1774 | + `${kleur.dim('Gone tools (no recent calls):')} ${report.gone_tools.join(', ')}\n`, |
| 1775 | + ); |
| 1776 | + } |
| 1777 | +} |
| 1778 | + |
| 1779 | +function writeDriftInsufficient(report: DriftReport): void { |
| 1780 | + const names = report.insufficient_data |
| 1781 | + .map((row) => row.operation) |
| 1782 | + .slice(0, 12) |
| 1783 | + .join(', '); |
| 1784 | + const more = report.insufficient_data.length > 12 |
| 1785 | + ? `, +${report.insufficient_data.length - 12} more` |
| 1786 | + : ''; |
| 1787 | + process.stdout.write( |
| 1788 | + `${kleur.dim(`Insufficient data (n<${report.threshold.min_calls}):`)} ${names}${more}\n`, |
| 1789 | + ); |
| 1790 | +} |
| 1791 | + |
| 1792 | +function formatDriftRatio(ratio: number | null, classification: DriftReport['rows'][number]['classification']): string { |
| 1793 | + if (ratio === null) return '-'; |
| 1794 | + const rounded = ratio >= 10 ? ratio.toFixed(1) : ratio.toFixed(2); |
| 1795 | + if (classification === 'up_drift') return kleur.red(`▲${rounded}x`); |
| 1796 | + if (classification === 'down_drift') return kleur.green(`▼${rounded}x`); |
| 1797 | + return `${rounded}x`; |
| 1798 | +} |
| 1799 | + |
| 1800 | +function formatDriftClass(classification: DriftReport['rows'][number]['classification']): string { |
| 1801 | + switch (classification) { |
| 1802 | + case 'up_drift': |
| 1803 | + return kleur.red('up_drift'); |
| 1804 | + case 'down_drift': |
| 1805 | + return kleur.green('down_drift'); |
| 1806 | + case 'stable': |
| 1807 | + return kleur.dim('stable'); |
| 1808 | + case 'new_tool': |
| 1809 | + return kleur.cyan('new_tool'); |
| 1810 | + case 'gone': |
| 1811 | + return kleur.dim('gone'); |
| 1812 | + case 'insufficient_data': |
| 1813 | + return kleur.dim('insufficient'); |
| 1814 | + } |
| 1815 | +} |
| 1816 | + |
| 1817 | +function formatDaysLabel(days: number): string { |
| 1818 | + if (Number.isInteger(days)) return `${days}d`; |
| 1819 | + return `${days.toFixed(1)}d`; |
| 1820 | +} |
0 commit comments