Skip to content

Commit f254036

Browse files
authored
Token page optimize (#206)
* add span status distribution Signed-off-by: kerthcet <kerthcet@gmail.com> * Add span status distribution page Signed-off-by: kerthcet <kerthcet@gmail.com> * fix lint Signed-off-by: kerthcet <kerthcet@gmail.com> --------- Signed-off-by: kerthcet <kerthcet@gmail.com>
1 parent 165c2ab commit f254036

20 files changed

Lines changed: 1061 additions & 816 deletions

File tree

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
v0.1.1
1+
v0.2.0

alphatrion/server/graphql/resolvers.py

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -267,8 +267,6 @@ def total_runs(team_id: strawberry.ID) -> int:
267267

268268
@staticmethod
269269
def aggregate_team_tokens(team_id: strawberry.ID) -> dict[str, int]:
270-
from alphatrion import envs
271-
272270
if os.getenv(envs.ENABLE_TRACING, "false").lower() != "true":
273271
return {"total_tokens": 0, "input_tokens": 0, "output_tokens": 0}
274272

@@ -283,8 +281,6 @@ def aggregate_team_tokens(team_id: strawberry.ID) -> dict[str, int]:
283281
def aggregate_model_distributions(
284282
team_id: strawberry.ID,
285283
) -> list[ModelDistribution]:
286-
from alphatrion import envs
287-
288284
if os.getenv(envs.ENABLE_TRACING, "false").lower() != "true":
289285
return []
290286

@@ -456,7 +452,6 @@ async def get_artifact_content(
456452
@staticmethod
457453
def aggregate_run_tokens(run_id: strawberry.ID) -> dict[str, int]:
458454
"""Aggregate token usage from all traces for a run."""
459-
from alphatrion import envs
460455

461456
if os.getenv(envs.ENABLE_TRACING, "false").lower() != "true":
462457
return {"total_tokens": 0, "input_tokens": 0, "output_tokens": 0}
@@ -578,7 +573,6 @@ def get_experiment_usage(experiment_id: strawberry.ID):
578573
@staticmethod
579574
def list_spans(run_id: strawberry.ID) -> list[Span]:
580575
"""List all spans for a specific run."""
581-
from alphatrion import envs
582576

583577
# Check if tracing is enabled
584578
if os.getenv(envs.ENABLE_TRACING, "false").lower() != "true":
@@ -661,7 +655,6 @@ def get_daily_token_usage(
661655
team_id: strawberry.ID, days: int = 7
662656
) -> list[DailyTokenUsage]:
663657
"""Get daily token usage from LLM calls for a team."""
664-
from alphatrion import envs
665658

666659
# Check if tracing is enabled
667660
if os.getenv(envs.ENABLE_TRACING, "false").lower() != "true":
@@ -689,6 +682,28 @@ def get_daily_token_usage(
689682
print(f"Failed to fetch daily token usage: {e}")
690683
return []
691684

685+
@staticmethod
686+
def get_experiment_trace_stats(experiment_id: strawberry.ID) -> dict[str, int]:
687+
"""Get trace statistics (success/error counts) for an experiment."""
688+
689+
# Check if tracing is enabled
690+
if os.getenv(envs.ENABLE_TRACING, "false").lower() != "true":
691+
return {"total_spans": 0, "success_spans": 0, "error_spans": 0}
692+
693+
try:
694+
trace_store = runtime.storage_runtime().tracestore
695+
stats = trace_store.get_trace_stats_by_exp_id(exp_id=experiment_id)
696+
# Don't close - it's a shared singleton connection
697+
return stats
698+
except Exception as e:
699+
# Log error and return zeros - don't fail the GraphQL query
700+
import logging
701+
702+
logging.error(
703+
f"Failed to get trace stats for experiment {experiment_id}: {e}"
704+
)
705+
return {"total_spans": 0, "success_spans": 0, "error_spans": 0}
706+
692707
@staticmethod
693708
def list_datasets(
694709
team_id: strawberry.ID,

alphatrion/server/graphql/types.py

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,21 @@ class ModelDistribution:
1919
count: int
2020

2121

22+
@strawberry.type
23+
class DailyTokenUsage:
24+
date: str
25+
total_tokens: int
26+
input_tokens: int
27+
output_tokens: int
28+
29+
30+
@strawberry.type
31+
class TraceStats:
32+
total_spans: int
33+
success_spans: int
34+
error_spans: int
35+
36+
2237
@strawberry.type
2338
class Team:
2439
id: strawberry.ID
@@ -152,6 +167,17 @@ def aggregated_tokens(self) -> TokenStats:
152167
output_tokens=tokens["output_tokens"],
153168
)
154169

170+
@strawberry.field
171+
def trace_stats(self) -> TraceStats:
172+
from .resolvers import GraphQLResolvers
173+
174+
stats = GraphQLResolvers.get_experiment_trace_stats(experiment_id=self.id)
175+
return TraceStats(
176+
total_spans=stats["total_spans"],
177+
success_spans=stats["success_spans"],
178+
error_spans=stats["error_spans"],
179+
)
180+
155181

156182
@strawberry.type
157183
class Run:
@@ -316,11 +342,3 @@ class Span:
316342
resource_attributes: JSON
317343
events: list[TraceEvent]
318344
links: list[TraceLink]
319-
320-
321-
@strawberry.type
322-
class DailyTokenUsage:
323-
date: str
324-
total_tokens: int
325-
input_tokens: int
326-
output_tokens: int

alphatrion/storage/tracestore.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,40 @@ def get_daily_token_usage(
448448
logger.error(f"Failed to get daily token usage: {e}")
449449
return []
450450

451+
def get_trace_stats_by_exp_id(self, exp_id: uuid.UUID) -> dict[str, int]:
452+
"""Get trace statistics (success/error counts) for a specific experiment_id.
453+
454+
Args:
455+
exp_id: The experiment ID to filter by
456+
457+
Returns:
458+
Dict with keys: total_spans, success_spans, error_spans
459+
"""
460+
with self._lock:
461+
try:
462+
query = f"""
463+
SELECT
464+
COUNT(*) as total_spans,
465+
countIf(StatusCode = 'OK' OR StatusCode = 'UNSET') as success_spans,
466+
countIf(StatusCode = 'ERROR') as error_spans
467+
FROM {self.database}.otel_spans
468+
WHERE ExperimentId = '{exp_id}'
469+
"""
470+
471+
result = self.client.query(query)
472+
rows = list(result.named_results())
473+
if rows and len(rows) > 0:
474+
row = rows[0]
475+
return {
476+
"total_spans": int(row["total_spans"]),
477+
"success_spans": int(row["success_spans"]),
478+
"error_spans": int(row["error_spans"]),
479+
}
480+
return {"total_spans": 0, "success_spans": 0, "error_spans": 0}
481+
except Exception as e:
482+
logger.error(f"Failed to get trace stats by exp_id: {e}")
483+
return {"total_spans": 0, "success_spans": 0, "error_spans": 0}
484+
451485
def close(self) -> None:
452486
"""Close the ClickHouse connection."""
453487
try:
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import { useMemo } from 'react';
2+
import { PieChart, Pie, Cell, ResponsiveContainer, Legend, Tooltip } from 'recharts';
3+
import type { Run } from '../../types';
4+
5+
interface RunStatusChartProps {
6+
runs: Run[];
7+
}
8+
9+
export function RunStatusChart({ runs }: RunStatusChartProps) {
10+
const chartData = useMemo(() => {
11+
if (!runs || runs.length === 0) return [];
12+
13+
const stats = [
14+
{ name: 'COMPLETED', value: runs.filter(r => r.status === 'COMPLETED').length, color: '#22c55e' },
15+
{ name: 'RUNNING', value: runs.filter(r => r.status === 'RUNNING').length, color: '#3b82f6' },
16+
{ name: 'FAILED', value: runs.filter(r => r.status === 'FAILED').length, color: '#ef4444' },
17+
{ name: 'PENDING', value: runs.filter(r => r.status === 'PENDING').length, color: '#eab308' },
18+
{ name: 'CANCELLED', value: runs.filter(r => r.status === 'CANCELLED').length, color: '#6b7280' },
19+
{ name: 'UNKNOWN', value: runs.filter(r => r.status === 'UNKNOWN').length, color: '#a78bfa' },
20+
];
21+
22+
return stats.filter(s => s.value > 0);
23+
}, [runs]);
24+
25+
// Calculate total for percentages
26+
const total = runs.length;
27+
28+
if (runs.length === 0 || chartData.length === 0) {
29+
return (
30+
<div className="flex h-48 items-center justify-center text-xs text-muted-foreground">
31+
No run data available
32+
</div>
33+
);
34+
}
35+
36+
return (
37+
<ResponsiveContainer width="100%" height={200}>
38+
<PieChart>
39+
<Pie
40+
data={chartData}
41+
dataKey="value"
42+
nameKey="name"
43+
cx="50%"
44+
cy="50%"
45+
outerRadius={70}
46+
labelLine={false}
47+
label={(entry) => `${((entry.value / total) * 100).toFixed(1)}%`}
48+
style={{ fontSize: '10px' }}
49+
>
50+
{chartData.map((entry, index) => (
51+
<Cell key={`cell-${index}`} fill={entry.color} />
52+
))}
53+
</Pie>
54+
<Tooltip
55+
formatter={(value: number) => [value, 'Count']}
56+
contentStyle={{
57+
fontSize: '10px',
58+
backgroundColor: 'hsl(var(--card))',
59+
border: '1px solid hsl(var(--border))',
60+
borderRadius: '6px',
61+
}}
62+
/>
63+
<Legend wrapperStyle={{ fontSize: '10px' }} />
64+
</PieChart>
65+
</ResponsiveContainer>
66+
);
67+
}
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import { useMemo } from 'react';
2+
import { PieChart, Pie, Cell, ResponsiveContainer, Legend, Tooltip } from 'recharts';
3+
import type { TraceStats } from '../../types';
4+
5+
interface TraceErrorRateChartProps {
6+
traceStats: TraceStats;
7+
}
8+
9+
export function TraceErrorRateChart({ traceStats }: TraceErrorRateChartProps) {
10+
const chartData = useMemo(() => {
11+
const { successSpans, errorSpans } = traceStats;
12+
13+
const data = [];
14+
if (successSpans > 0) {
15+
data.push({ name: 'Success', value: successSpans, color: '#22c55e' });
16+
}
17+
if (errorSpans > 0) {
18+
data.push({ name: 'Error', value: errorSpans, color: '#ef4444' });
19+
}
20+
21+
return data;
22+
}, [traceStats]);
23+
24+
// Calculate total for percentages
25+
const total = traceStats.totalSpans;
26+
27+
if (traceStats.totalSpans === 0 || chartData.length === 0) {
28+
return (
29+
<div className="flex h-48 items-center justify-center text-xs text-muted-foreground">
30+
No trace data available
31+
</div>
32+
);
33+
}
34+
35+
return (
36+
<ResponsiveContainer width="100%" height={200}>
37+
<PieChart>
38+
<Pie
39+
data={chartData}
40+
dataKey="value"
41+
nameKey="name"
42+
cx="50%"
43+
cy="50%"
44+
outerRadius={70}
45+
labelLine={false}
46+
label={(entry) => `${((entry.value / total) * 100).toFixed(1)}%`}
47+
style={{ fontSize: '10px' }}
48+
>
49+
{chartData.map((entry, index) => (
50+
<Cell key={`cell-${index}`} fill={entry.color} />
51+
))}
52+
</Pie>
53+
<Tooltip
54+
formatter={(value: number) => [value, 'Spans']}
55+
contentStyle={{
56+
fontSize: '10px',
57+
backgroundColor: 'hsl(var(--card))',
58+
border: '1px solid hsl(var(--border))',
59+
borderRadius: '6px',
60+
}}
61+
/>
62+
<Legend wrapperStyle={{ fontSize: '10px' }} />
63+
</PieChart>
64+
</ResponsiveContainer>
65+
);
66+
}

dashboard/src/lib/format.ts

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,38 @@ export function truncateId(id: string, prefixLen: number = 4, suffixLen: number
88
}
99
return `${id.slice(0, prefixLen)}....${id.slice(-suffixLen)}`;
1010
}
11+
12+
/**
13+
* Format duration in seconds to human-readable format
14+
* Example:
15+
* 0.5 -> "500ms"
16+
* 65 -> "1m 5s"
17+
* 3665 -> "1h 1m 5s"
18+
*/
19+
export function formatDuration(seconds: number): string {
20+
if (seconds === 0) {
21+
return '0s';
22+
}
23+
24+
if (seconds < 1) {
25+
return `${Math.round(seconds * 1000)}ms`;
26+
}
27+
28+
const hours = Math.floor(seconds / 3600);
29+
const minutes = Math.floor((seconds % 3600) / 60);
30+
const secs = Math.floor(seconds % 60);
31+
32+
const parts: string[] = [];
33+
34+
if (hours > 0) {
35+
parts.push(`${hours}h`);
36+
}
37+
if (minutes > 0) {
38+
parts.push(`${minutes}m`);
39+
}
40+
if (secs > 0 || parts.length === 0) {
41+
parts.push(`${secs}s`);
42+
}
43+
44+
return parts.join(' ');
45+
}

dashboard/src/lib/graphql-client.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,11 @@ export const queries = {
194194
inputTokens
195195
outputTokens
196196
}
197+
traceStats {
198+
totalSpans
199+
successSpans
200+
errorSpans
201+
}
197202
metrics {
198203
id
199204
key

0 commit comments

Comments
 (0)