-
Notifications
You must be signed in to change notification settings - Fork 334
Expand file tree
/
Copy pathDDLLMObsSpanTest.groovy
More file actions
424 lines (348 loc) · 13.9 KB
/
DDLLMObsSpanTest.groovy
File metadata and controls
424 lines (348 loc) · 13.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
package datadog.trace.llmobs.domain
import static datadog.trace.agent.test.utils.TraceUtils.runUnderTrace
import datadog.trace.agent.tooling.TracerInstaller
import datadog.trace.api.DDTags
import datadog.trace.api.IdGenerationStrategy
import datadog.trace.api.WellKnownTags
import datadog.trace.api.telemetry.LLMObsMetricCollector
import datadog.trace.api.llmobs.LLMObs
import datadog.trace.api.llmobs.LLMObsSpan
import datadog.trace.api.llmobs.LLMObsTags
import datadog.trace.bootstrap.instrumentation.api.AgentSpan
import datadog.trace.bootstrap.instrumentation.api.AgentTracer
import datadog.trace.bootstrap.instrumentation.api.Tags
import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString
import datadog.trace.core.CoreTracer
import datadog.trace.test.util.DDSpecification
import org.apache.groovy.util.Maps
import spock.lang.Shared
class DDLLMObsSpanTest extends DDSpecification{
@SuppressWarnings('PropertyName')
@Shared
AgentTracer.TracerAPI TEST_TRACER
void setupSpec() {
TEST_TRACER =
Spy(
CoreTracer.builder()
.idGenerationStrategy(IdGenerationStrategy.fromName("SEQUENTIAL"))
.build())
TracerInstaller.forceInstallGlobalTracer(TEST_TRACER)
TEST_TRACER.startSpan(*_) >> {
def agentSpan = callRealMethod()
agentSpan
}
}
void cleanupSpec() {
TEST_TRACER?.close()
}
void setup() {
assert TEST_TRACER.activeSpan() == null: "Span is active before test has started: " + TEST_TRACER.activeSpan()
TEST_TRACER.flush()
}
void cleanup() {
TEST_TRACER.flush()
}
// Prefix for tags
private static final String LLMOBS_TAG_PREFIX = "_ml_obs_tag."
// Prefix for metrics
private static final String LLMOBS_METRIC_PREFIX = "_ml_obs_metric."
// internal tags to be prefixed
private static final String INPUT = LLMOBS_TAG_PREFIX + "input"
private static final String OUTPUT = LLMOBS_TAG_PREFIX + "output"
private static final String METADATA = LLMOBS_TAG_PREFIX + LLMObsTags.METADATA
def "test span simple"() {
setup:
def test = llmObsSpan(Tags.LLMOBS_WORKFLOW_SPAN_KIND, "test-span")
when:
def input = "test input"
def output = "test output"
// initial set
test.annotateIO(input, output)
test.setMetadata(Maps.of("sport", "baseball", "price_data", Maps.of("gpt4", 100)))
test.setMetrics(Maps.of("rank", 1))
test.setMetric("likelihood", 0.1)
test.setTag("DOMAIN", "north-america")
test.setTags(Maps.of("bulk1", 1, "bulk2", "2"))
def errMsg = "mr brady"
test.setErrorMessage(errMsg)
then:
def innerSpan = (AgentSpan)test.span
Tags.LLMOBS_WORKFLOW_SPAN_KIND.equals(innerSpan.getTag(LLMOBS_TAG_PREFIX + "span.kind"))
null == innerSpan.getTag("input")
input.equals(innerSpan.getTag(INPUT))
null == innerSpan.getTag("output")
output.equals(innerSpan.getTag(OUTPUT))
null == innerSpan.getTag("metadata")
def expectedMetadata = Maps.of("sport", "baseball", "price_data", Maps.of("gpt4", 100))
expectedMetadata.equals(innerSpan.getTag(METADATA))
null == innerSpan.getTag("rank")
def rankMetric = innerSpan.getTag(LLMOBS_METRIC_PREFIX + "rank")
rankMetric instanceof Number && 1 == (int)rankMetric
null == innerSpan.getTag("likelihood")
def likelihoodMetric = innerSpan.getTag(LLMOBS_METRIC_PREFIX + "likelihood")
likelihoodMetric instanceof Number
0.1 == (double)likelihoodMetric
null == innerSpan.getTag("DOMAIN")
def domain = innerSpan.getTag(LLMOBS_TAG_PREFIX + "DOMAIN")
domain instanceof String
"north-america".equals((String)domain)
null == innerSpan.getTag("bulk1")
def tagBulk1 = innerSpan.getTag(LLMOBS_TAG_PREFIX + "bulk1")
tagBulk1 instanceof Number
1 == ((int)tagBulk1)
null == innerSpan.getTag("bulk2")
def tagBulk2 = innerSpan.getTag(LLMOBS_TAG_PREFIX + "bulk2")
tagBulk2 instanceof String
"2".equals((String)tagBulk2)
innerSpan.isError()
innerSpan.getTag(DDTags.ERROR_MSG) instanceof String
errMsg.equals(innerSpan.getTag(DDTags.ERROR_MSG))
null == innerSpan.getTag("env")
def tagEnv = innerSpan.getTag(LLMOBS_TAG_PREFIX + "env")
tagEnv instanceof UTF8BytesString
"test-env" == tagEnv.toString()
null == innerSpan.getTag("service")
def tagSvc = innerSpan.getTag(LLMOBS_TAG_PREFIX + "service")
tagSvc instanceof UTF8BytesString
"test-svc" == tagSvc.toString()
null == innerSpan.getTag("version")
def tagVersion = innerSpan.getTag(LLMOBS_TAG_PREFIX + "version")
tagVersion instanceof UTF8BytesString
"v1" == tagVersion.toString()
}
def "test span with overwrites"() {
setup:
def test = llmObsSpan(Tags.LLMOBS_AGENT_SPAN_KIND, "test-span")
when:
def input = "test input"
// initial set
test.annotateIO(input, "test output")
// this should be a no-op
test.annotateIO("", "")
// this should replace the initial output
def expectedOutput = Arrays.asList(Maps.of("role", "user", "content", "how much is gas"))
test.annotateIO(null, expectedOutput)
// initial set
test.setMetadata(Maps.of("sport", "baseball", "price_data", Maps.of("gpt4", 100)))
// this should replace baseball with hockey
test.setMetadata(Maps.of("sport", "hockey"))
// this should add a new key
test.setMetadata(Maps.of("temperature", 30))
// initial set
test.setMetrics(Maps.of("rank", 1))
// this should replace the metric
test.setMetric("rank", 10)
// initial set
test.setTag("DOMAIN", "north-america")
// add and replace
test.setTags(Maps.of("bulk1", 1, "DOMAIN", "europe"))
def throwableMsg = "false positive"
test.addThrowable(new Throwable(throwableMsg))
test.setError(false)
then:
def innerSpan = (AgentSpan)test.span
Tags.LLMOBS_AGENT_SPAN_KIND.equals(innerSpan.getTag(LLMOBS_TAG_PREFIX + "span.kind"))
null == innerSpan.getTag("input")
input.equals(innerSpan.getTag(INPUT))
null == innerSpan.getTag("output")
expectedOutput.equals(innerSpan.getTag(OUTPUT))
null == innerSpan.getTag("metadata")
def expectedMetadata = Maps.of("sport", "hockey", "price_data", Maps.of("gpt4", 100), "temperature", 30)
expectedMetadata.equals(innerSpan.getTag(METADATA))
null == innerSpan.getTag("rank")
def rankMetric = innerSpan.getTag(LLMOBS_METRIC_PREFIX + "rank")
rankMetric instanceof Number && 10 == (int)rankMetric
null == innerSpan.getTag("DOMAIN")
def domain = innerSpan.getTag(LLMOBS_TAG_PREFIX + "DOMAIN")
domain instanceof String
"europe".equals((String)domain)
null == innerSpan.getTag("bulk1")
def tagBulk1 = innerSpan.getTag(LLMOBS_TAG_PREFIX + "bulk1")
tagBulk1 instanceof Number
1 == ((int)tagBulk1)
!innerSpan.isError()
innerSpan.getTag(DDTags.ERROR_MSG) instanceof String
throwableMsg.equals(innerSpan.getTag(DDTags.ERROR_MSG))
innerSpan.getTag(DDTags.ERROR_STACK) instanceof String
((String)innerSpan.getTag(DDTags.ERROR_STACK)).contains(throwableMsg)
null == innerSpan.getTag("env")
def tagEnv = innerSpan.getTag(LLMOBS_TAG_PREFIX + "env")
tagEnv instanceof UTF8BytesString
"test-env" == tagEnv.toString()
null == innerSpan.getTag("service")
def tagSvc = innerSpan.getTag(LLMOBS_TAG_PREFIX + "service")
tagSvc instanceof UTF8BytesString
"test-svc" == tagSvc.toString()
null == innerSpan.getTag("version")
def tagVersion = innerSpan.getTag(LLMOBS_TAG_PREFIX + "version")
tagVersion instanceof UTF8BytesString
"v1" == tagVersion.toString()
}
def "test llm span string input formatted to messages"() {
setup:
def test = llmObsSpan(Tags.LLMOBS_LLM_SPAN_KIND, "test-span")
when:
def input = "test input"
def output = "test output"
// initial set
test.annotateIO(input, output)
then:
def innerSpan = (AgentSpan)test.span
Tags.LLMOBS_LLM_SPAN_KIND.equals(innerSpan.getTag(LLMOBS_TAG_PREFIX + "span.kind"))
null == innerSpan.getTag("input")
def spanInput = innerSpan.getTag(INPUT)
spanInput instanceof List
((List)spanInput).size() == 1
spanInput.get(0) instanceof LLMObs.LLMMessage
def expectedInputMsg = LLMObs.LLMMessage.from("unknown", input)
expectedInputMsg.getContent().equals(input)
expectedInputMsg.getRole().equals("unknown")
expectedInputMsg.getToolCalls().equals(null)
null == innerSpan.getTag("output")
def spanOutput = innerSpan.getTag(OUTPUT)
spanOutput instanceof List
((List)spanOutput).size() == 1
spanOutput.get(0) instanceof LLMObs.LLMMessage
def expectedOutputMsg = LLMObs.LLMMessage.from("unknown", output)
expectedOutputMsg.getContent().equals(output)
expectedOutputMsg.getRole().equals("unknown")
expectedOutputMsg.getToolCalls().equals(null)
null == innerSpan.getTag("env")
def tagEnv = innerSpan.getTag(LLMOBS_TAG_PREFIX + "env")
tagEnv instanceof UTF8BytesString
"test-env" == tagEnv.toString()
null == innerSpan.getTag("service")
def tagSvc = innerSpan.getTag(LLMOBS_TAG_PREFIX + "service")
tagSvc instanceof UTF8BytesString
"test-svc" == tagSvc.toString()
null == innerSpan.getTag("version")
def tagVersion = innerSpan.getTag(LLMOBS_TAG_PREFIX + "version")
tagVersion instanceof UTF8BytesString
"v1" == tagVersion.toString()
}
def "test llm span with messages"() {
setup:
def test = llmObsSpan(Tags.LLMOBS_LLM_SPAN_KIND, "test-span")
when:
def inputMsg = LLMObs.LLMMessage.from("user", "input")
def outputMsg = LLMObs.LLMMessage.from("assistant", "output", Arrays.asList(LLMObs.ToolCall.from("weather-tool", "function", "6176241000", Maps.of("location", "paris"))))
// initial set
test.annotateIO(Arrays.asList(inputMsg), Arrays.asList(outputMsg))
then:
def innerSpan = (AgentSpan)test.span
Tags.LLMOBS_LLM_SPAN_KIND.equals(innerSpan.getTag(LLMOBS_TAG_PREFIX + "span.kind"))
null == innerSpan.getTag("input")
def spanInput = innerSpan.getTag(INPUT)
spanInput instanceof List
((List)spanInput).size() == 1
def spanInputMsg = spanInput.get(0)
spanInputMsg instanceof LLMObs.LLMMessage
spanInputMsg.getContent().equals(inputMsg.getContent())
spanInputMsg.getRole().equals("user")
spanInputMsg.getToolCalls().equals(null)
null == innerSpan.getTag("output")
def spanOutput = innerSpan.getTag(OUTPUT)
spanOutput instanceof List
((List)spanOutput).size() == 1
def spanOutputMsg = spanOutput.get(0)
spanOutputMsg instanceof LLMObs.LLMMessage
spanOutputMsg.getContent().equals(outputMsg.getContent())
spanOutputMsg.getRole().equals("assistant")
spanOutputMsg.getToolCalls().size() == 1
def toolCall = spanOutputMsg.getToolCalls().get(0)
toolCall.getName().equals("weather-tool")
toolCall.getType().equals("function")
toolCall.getToolId().equals("6176241000")
def expectedToolArgs = Maps.of("location", "paris")
toolCall.getArguments().equals(expectedToolArgs)
null == innerSpan.getTag("env")
def tagEnv = innerSpan.getTag(LLMOBS_TAG_PREFIX + "env")
tagEnv instanceof UTF8BytesString
"test-env" == tagEnv.toString()
null == innerSpan.getTag("service")
def tagSvc = innerSpan.getTag(LLMOBS_TAG_PREFIX + "service")
tagSvc instanceof UTF8BytesString
"test-svc" == tagSvc.toString()
null == innerSpan.getTag("version")
def tagVersion = innerSpan.getTag(LLMOBS_TAG_PREFIX + "version")
tagVersion instanceof UTF8BytesString
"v1" == tagVersion.toString()
}
def "finish records span.finished telemetry when LLMObs enabled"() {
setup:
LLMObsMetricCollector collector = LLMObsMetricCollector.get()
collector.drain()
when:
llmObsSpan(Tags.LLMOBS_WORKFLOW_SPAN_KIND, "workflow-span").finish()
then:
def metrics = collector.drain()
metrics.size() == 1
and:
def m = metrics[0]
m.namespace == 'mlobs'
m.metricName == 'span.finished'
m.type == 'count'
m.value == 1
m.tags.contains('integration:llmobs')
m.tags.contains('span_kind:workflow')
m.tags.contains('autoinstrumented:0')
m.tags.contains('is_root_span:1')
}
def "finish records span.finished telemetry for non-root span when LLMObs enabled"() {
setup:
LLMObsMetricCollector collector = LLMObsMetricCollector.get()
collector.drain()
when:
runUnderTrace("parent") {
llmObsSpan(Tags.LLMOBS_LLM_SPAN_KIND, "child-llm").finish()
}
then:
def metrics = collector.drain()
metrics.size() == 1
and:
def m = metrics[0]
m.namespace == 'mlobs'
m.metricName == 'span.finished'
m.type == 'count'
m.value == 1
m.tags.contains('integration:llmobs')
m.tags.contains('span_kind:llm')
m.tags.contains('autoinstrumented:0')
m.tags.contains('is_root_span:0')
}
def "span has expected session tag and telemetry has #expectedHasSessionIdTag"() {
setup:
LLMObsMetricCollector collector = LLMObsMetricCollector.get()
collector.drain()
when:
llmObsSpan(Tags.LLMOBS_WORKFLOW_SPAN_KIND, "workflow-span", sessionId).finish()
then:
def metrics = collector.drain()
metrics.size() == 1
and:
def m = metrics[0]
m.namespace == 'mlobs'
m.metricName == 'span.finished'
m.tags.contains(expectedHasSessionIdTag)
where:
sessionId | expectedHasSessionIdTag
"session-123" | "has_session_id:1"
null | "has_session_id:0"
}
def "global dd_tags are included in LLMObs span tags"() {
setup:
injectSysConfig("trace.global.tags", "team:backend,owner:ml-platform")
def test = llmObsSpan(Tags.LLMOBS_WORKFLOW_SPAN_KIND, "test-span")
expect:
def innerSpan = (AgentSpan) test.span
innerSpan.getTag(LLMOBS_TAG_PREFIX + "team") == "backend"
innerSpan.getTag(LLMOBS_TAG_PREFIX + "owner") == "ml-platform"
}
private LLMObsSpan llmObsSpan(String kind, name) {
llmObsSpan(kind, name, null)
}
private LLMObsSpan llmObsSpan(String kind, name, String sessionId) {
def tags = new WellKnownTags("test-runtime-1", "host-1", "test-env", "test-svc", "v1", "java")
new DDLLMObsSpan(kind, name, "test-ml-app", sessionId, "test-svc", tags)
}
}