Skip to content

Commit 21d2582

Browse files
authored
Merge pull request #13 from braintrustdata/ark/anthropic_time_to_first_token
add time to first token metric for anthropic instrumentation
2 parents f8682b7 + 1732b75 commit 21d2582

3 files changed

Lines changed: 42 additions & 2 deletions

File tree

src/main/java/dev/braintrust/instrumentation/anthropic/otel/InstrumentedMessageService.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ private Message create(MessageCreateParams inputMessage, RequestOptions requestO
7979
}
8080

8181
Context context = instrumenter.start(parentContext, inputMessage);
82+
long startTimeNanos = System.nanoTime();
8283
Message outputMessage;
8384
try (Scope ignored = context.makeCurrent()) {
8485
Span currentSpan = Span.current();
@@ -96,6 +97,10 @@ private Message create(MessageCreateParams inputMessage, RequestOptions requestO
9697
}
9798
BraintrustAnthropicSpanAttributes.setInputMessages(currentSpan, inputMessages);
9899
outputMessage = delegate.create(inputMessage, requestOptions);
100+
long endTimeNanos = System.nanoTime();
101+
double timeToFirstTokenSeconds = (endTimeNanos - startTimeNanos) / 1_000_000_000.0;
102+
currentSpan.setAttribute(
103+
"braintrust.metrics.time_to_first_token", timeToFirstTokenSeconds);
99104
BraintrustAnthropicSpanAttributes.setOutputMessage(Span.current(), outputMessage);
100105
} catch (Throwable t) {
101106
instrumenter.end(context, inputMessage, null, t);
@@ -143,12 +148,18 @@ private StreamResponse<RawMessageStreamEvent> createStreamingWithAttributes(
143148
}
144149
BraintrustAnthropicSpanAttributes.setInputMessages(span, inputMessages);
145150

151+
long startTimeNanos = System.nanoTime();
146152
StreamResponse<RawMessageStreamEvent> result =
147153
delegate.createStreaming(inputMessage, requestOptions);
148154
return new TracingStreamedResponse(
149155
result,
150156
new StreamListener(
151-
context, inputMessage, instrumenter, captureMessageContent, newSpan));
157+
context,
158+
inputMessage,
159+
instrumenter,
160+
captureMessageContent,
161+
newSpan,
162+
startTimeNanos));
152163
}
153164

154165
private static String contentToString(MessageCreateParams.System content) {

src/main/java/dev/braintrust/instrumentation/anthropic/otel/StreamListener.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ final class StreamListener {
2727
private final boolean captureMessageContent;
2828
private final boolean newSpan;
2929
private final AtomicBoolean hasEnded;
30+
private final long startTimeNanos;
3031

3132
private final StringBuilder contentBuilder = new StringBuilder();
3233

@@ -35,23 +36,32 @@ final class StreamListener {
3536
@Nullable private Model model;
3637
@Nullable private String responseId;
3738
@Nullable private String stopReason;
39+
@Nullable private Double timeToFirstToken;
3840

3941
StreamListener(
4042
Context context,
4143
MessageCreateParams request,
4244
Instrumenter<MessageCreateParams, Message> instrumenter,
4345
boolean captureMessageContent,
44-
boolean newSpan) {
46+
boolean newSpan,
47+
long startTimeNanos) {
4548
this.context = context;
4649
this.request = request;
4750
this.instrumenter = instrumenter;
4851
this.captureMessageContent = captureMessageContent;
4952
this.newSpan = newSpan;
53+
this.startTimeNanos = startTimeNanos;
5054
hasEnded = new AtomicBoolean();
5155
}
5256

5357
@SneakyThrows
5458
void onEvent(RawMessageStreamEvent event) {
59+
// Capture time to first token on the first event
60+
if (timeToFirstToken == null) {
61+
long firstEventTimeNanos = System.nanoTime();
62+
timeToFirstToken = (firstEventTimeNanos - startTimeNanos) / 1_000_000_000.0;
63+
}
64+
5565
// Handle message_start event
5666
if (event.messageStart().isPresent()) {
5767
var messageStart = event.messageStart().get();
@@ -144,6 +154,11 @@ void endSpan(@Nullable Throwable error) {
144154
span.setAttribute("gen_ai.usage.output_tokens", usage.outputTokens());
145155
}
146156

157+
// Set time to first token if captured
158+
if (timeToFirstToken != null) {
159+
span.setAttribute("braintrust.metrics.time_to_first_token", timeToFirstToken);
160+
}
161+
147162
instrumenter.end(context, request, null, error);
148163
}
149164
}

src/test/java/dev/braintrust/instrumentation/anthropic/BraintrustAnthropicTest.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,13 @@ void testWrapAnthropic() {
152152
outputMessage.get("content").get(0).get("text").asText());
153153
assertEquals(8, outputMessage.get("usage").get("output_tokens").asInt());
154154
assertEquals(20, outputMessage.get("usage").get("input_tokens").asInt());
155+
156+
// Verify time to first token
157+
Double timeToFirstToken =
158+
span.getAttributes()
159+
.get(AttributeKey.doubleKey("braintrust.metrics.time_to_first_token"));
160+
assertNotNull(timeToFirstToken, "time_to_first_token should be present");
161+
assertTrue(timeToFirstToken >= 0.0, "time_to_first_token should be non-negative");
155162
}
156163

157164
@Test
@@ -280,5 +287,12 @@ void testWrapAnthropicStreaming() {
280287
var messageZero = outputMessages.get(0);
281288
assertEquals("assistant", messageZero.get("role").asText());
282289
assertEquals("The capital of France is Paris.", messageZero.get("content").asText());
290+
291+
// Verify time to first token
292+
Double timeToFirstToken =
293+
span.getAttributes()
294+
.get(AttributeKey.doubleKey("braintrust.metrics.time_to_first_token"));
295+
assertNotNull(timeToFirstToken, "time_to_first_token should be present for streaming");
296+
assertTrue(timeToFirstToken >= 0.0, "time_to_first_token should be non-negative");
283297
}
284298
}

0 commit comments

Comments
 (0)