braintrust-sdk-go/examples/internal/functions/main.go at main · braintrustdata/braintrust-sdk-go · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
package main

import (
	"context"
	"fmt"
	"log"

	"go.opentelemetry.io/otel/sdk/trace"

	"github.com/braintrustdata/braintrust-sdk-go"
	"github.com/braintrustdata/braintrust-sdk-go/api"
	"github.com/braintrustdata/braintrust-sdk-go/api/datasets"
	functionsapi "github.com/braintrustdata/braintrust-sdk-go/api/functions"
	"github.com/braintrustdata/braintrust-sdk-go/api/projects"
	"github.com/braintrustdata/braintrust-sdk-go/eval"
)

// SentimentInput represents input for sentiment analysis
type SentimentInput struct {
	Text string `json:"text"`
}

// SentimentOutput represents the sentiment classification result
type SentimentOutput struct {
	Sentiment string `json:"sentiment"` // "positive", "negative", or "neutral"
}

func main() {
	ctx := context.Background()

	// Create tracer provider
	tp := trace.NewTracerProvider()
	defer func() {
		if err := tp.Shutdown(ctx); err != nil {
			log.Printf("Error shutting down tracer provider: %v", err)
		}
	}()

	// Initialize Braintrust client
	client, err := braintrust.New(tp,
		braintrust.WithProject("go-sdk-examples"),
	)
	if err != nil {
		log.Fatalf("Failed to create client: %v", err)
	}

	apiClient := client.API()

	// Step 1: Create a dataset with sentiment analysis test cases
	fmt.Println("=== Step 1: Creating dataset ===")
	datasetID, err := createSentimentDataset(ctx, apiClient)
	if err != nil {
		log.Fatalf("Failed to create dataset: %v", err)
	}
	fmt.Printf("✓ Created dataset: %s\n\n", datasetID)

	// Step 2: Create a task/prompt function for sentiment classification
	fmt.Println("=== Step 2: Creating task function ===")
	taskSlug := "sentiment-classifier-task"
	if err := createSentimentTask(ctx, apiClient, taskSlug); err != nil {
		log.Fatalf("Failed to create task: %v", err)
	}
	fmt.Printf("✓ Created task: %s\n\n", taskSlug)

	// Step 3: Create a scorer function to evaluate sentiment accuracy
	fmt.Println("=== Step 3: Creating scorer function ===")
	scorerSlug := "sentiment-accuracy-scorer"
	if err := createSentimentScorer(ctx, apiClient, scorerSlug); err != nil {
		log.Fatalf("Failed to create scorer: %v", err)
	}
	fmt.Printf("✓ Created scorer: %s\n\n", scorerSlug)

	// Step 4: Run evaluation using the Functions() API
	fmt.Println("=== Step 4: Running evaluation with Functions() API ===")
	evaluator := braintrust.NewEvaluator[SentimentInput, SentimentOutput](client)

	// Load dataset
	cases, err := evaluator.Datasets().Get(ctx, datasetID)
	if err != nil {
		log.Fatalf("Failed to load dataset: %v", err)
	}

	// Load task using Functions() API with FunctionOpts
	task, err := evaluator.Functions().Task(ctx, eval.FunctionOpts{
		Slug: taskSlug,
		// Project: "other-project",  // Optional: override project
		// Version: "abc123",          // Optional: pin to specific version
		// Environment: "production",  // Optional: specify environment
	})
	if err != nil {
		log.Fatalf("Failed to load task: %v", err)
	}
	fmt.Println("✓ Loaded task function")

	// Load scorer using Functions() API with FunctionOpts
	scorer, err := evaluator.Functions().Scorer(ctx, eval.FunctionOpts{
		Slug: scorerSlug,
	})
	if err != nil {
		log.Fatalf("Failed to load scorer: %v", err)
	}
	fmt.Println("✓ Loaded scorer function")

	// Run the evaluation
	result, err := evaluator.Run(ctx, eval.Opts[SentimentInput, SentimentOutput]{
		Experiment:  "sentiment-analysis-eval",
		Task:        task,
		Dataset:     cases,
		Scorers:     []eval.Scorer[SentimentInput, SentimentOutput]{scorer},
		Parallelism: 2,
	})
	if err != nil {
		log.Fatalf("Failed to run evaluation: %v", err)
	}

	fmt.Printf("\n✓ Evaluation complete!\n")
	fmt.Printf("  View results at: %s\n\n", result)

	// Step 5: Demonstrate using functions from different projects/environments
	fmt.Println("=== Step 5: Demo - Loading functions with different options ===")

	// Example 1: Load from a different project
	fmt.Println("Example: Loading task from different project")
	_, err = evaluator.Functions().Task(ctx, eval.FunctionOpts{
		Slug:    taskSlug,
		Project: "my-other-project",
	})
	if err != nil {
		fmt.Printf("  (Expected error - project doesn't exist): %v\n", err)
	}

	// Example 2: Pin to specific version
	fmt.Println("Example: Pinning to specific version")
	_, err = evaluator.Functions().Task(ctx, eval.FunctionOpts{
		Slug:    taskSlug,
		Version: "5878bd218351fb8e",
	})
	if err != nil {
		fmt.Printf("  (Expected error - version doesn't exist): %v\n", err)
	}

	// Example 3: Load from staging environment
	fmt.Println("Example: Loading from staging environment")
	_, err = evaluator.Functions().Task(ctx, eval.FunctionOpts{
		Slug:        taskSlug,
		Environment: "staging",
	})
	if err != nil {
		fmt.Printf("  (Expected error - environment not configured): %v\n\n", err)
	}

	// Step 6: Cleanup
	fmt.Println("=== Step 6: Cleaning up ===")
	if err := cleanup(ctx, apiClient, datasetID, taskSlug, scorerSlug); err != nil {
		fmt.Printf("⚠ Cleanup note: %v\n", err)
	} else {
		fmt.Println("✓ Cleanup complete")
	}
}

// createSentimentDataset creates a dataset with sentiment analysis test cases
func createSentimentDataset(ctx context.Context, apiClient *api.API) (string, error) {
	project, err := apiClient.Projects().Create(ctx, projects.CreateParams{
		Name: "go-sdk-examples",
	})
	if err != nil {
		return "", fmt.Errorf("failed to create project: %w", err)
	}

	dataset, err := apiClient.Datasets().Create(ctx, datasets.CreateParams{
		ProjectID:   project.ID,
		Name:        "sentiment-test-dataset",
		Description: "Test dataset for sentiment analysis with Functions() API example",
	})
	if err != nil {
		return "", fmt.Errorf("failed to create dataset: %w", err)
	}

	// Insert test cases with various sentiments
	events := []datasets.Event{
		{
			Input:    SentimentInput{Text: "I love this product! It's amazing!"},
			Expected: SentimentOutput{Sentiment: "positive"},
		},
		{
			Input:    SentimentInput{Text: "This is terrible. Very disappointed."},
			Expected: SentimentOutput{Sentiment: "negative"},
		},
		{
			Input:    SentimentInput{Text: "It's okay, nothing special."},
			Expected: SentimentOutput{Sentiment: "neutral"},
		},
		{
			Input:    SentimentInput{Text: "Absolutely wonderful experience!"},
			Expected: SentimentOutput{Sentiment: "positive"},
		},
		{
			Input:    SentimentInput{Text: "Worst purchase ever."},
			Expected: SentimentOutput{Sentiment: "negative"},
		},
	}

	if err := apiClient.Datasets().InsertEvents(ctx, dataset.ID, events); err != nil {
		return "", fmt.Errorf("failed to insert events: %w", err)
	}

	return dataset.ID, nil
}

// createSentimentTask creates a prompt function for sentiment classification
func createSentimentTask(ctx context.Context, apiClient *api.API, slug string) error {
	project, err := apiClient.Projects().Create(ctx, projects.CreateParams{
		Name: "go-sdk-examples",
	})
	if err != nil {
		return fmt.Errorf("failed to create project: %w", err)
	}

	// Delete existing function if it exists
	functions := apiClient.Functions()
	if existing, _ := functions.Query(ctx, functionsapi.QueryParams{
		ProjectName: "go-sdk-examples",
		Slug:        slug,
		Limit:       1,
	}); len(existing) > 0 {
		_ = functions.Delete(ctx, existing[0].ID)
	}

	// Create sentiment classification prompt
	_, err = functions.Create(ctx, functionsapi.CreateParams{
		ProjectID:    project.ID,
		Name:         "Sentiment Classifier",
		Slug:         slug,
		FunctionType: "task",
		FunctionData: map[string]any{
			"type": "prompt",
		},
		PromptData: map[string]any{
			"prompt": map[string]any{
				"type": "chat",
				"messages": []map[string]any{
					{
						"role":    "system",
						"content": "You are a sentiment analyzer. Classify the sentiment of the given text as 'positive', 'negative', or 'neutral'. Respond with valid JSON in the format: {\"sentiment\": \"<classification>\"}",
					},
					{
						"role":    "user",
						"content": "{{input.text}}",
					},
				},
			},
			"options": map[string]any{
				"model": "gpt-4o-mini",
				"params": map[string]any{
					"temperature":     0,
					"max_tokens":      20,
					"response_format": map[string]any{"type": "json_object"},
				},
			},
		},
	})
	if err != nil {
		return fmt.Errorf("failed to create task: %w", err)
	}

	return nil
}

// createSentimentScorer creates a scorer function to evaluate sentiment accuracy
func createSentimentScorer(ctx context.Context, apiClient *api.API, slug string) error {
	project, err := apiClient.Projects().Create(ctx, projects.CreateParams{
		Name: "go-sdk-examples",
	})
	if err != nil {
		return fmt.Errorf("failed to create project: %w", err)
	}

	// Delete existing function if it exists
	functions := apiClient.Functions()
	if existing, _ := functions.Query(ctx, functionsapi.QueryParams{
		ProjectName: "go-sdk-examples",
		Slug:        slug,
		Limit:       1,
	}); len(existing) > 0 {
		_ = functions.Delete(ctx, existing[0].ID)
	}

	// Create scorer prompt that checks if sentiment matches expected
	_, err = functions.Create(ctx, functionsapi.CreateParams{
		ProjectID:    project.ID,
		Name:         "Sentiment Accuracy Scorer",
		Slug:         slug,
		FunctionType: "scorer",
		FunctionData: map[string]any{
			"type": "prompt",
		},
		PromptData: map[string]any{
			"prompt": map[string]any{
				"type": "chat",
				"messages": []map[string]any{
					{
						"role":    "system",
						"content": "You are an evaluator that checks if sentiment classifications are correct. Return a score of 1 if the output sentiment matches the expected sentiment, 0 otherwise. Respond with valid JSON in the format: {\"score\": <0 or 1>, \"name\": \"sentiment_match\"}",
					},
					{
						"role":    "user",
						"content": "Expected sentiment: {{expected.sentiment}}\nActual sentiment: {{output.sentiment}}",
					},
				},
			},
			"options": map[string]any{
				"model": "gpt-4o-mini",
				"params": map[string]any{
					"temperature":     0,
					"max_tokens":      50,
					"response_format": map[string]any{"type": "json_object"},
				},
			},
		},
	})
	if err != nil {
		return fmt.Errorf("failed to create scorer: %w", err)
	}

	return nil
}

// cleanup removes test resources
func cleanup(ctx context.Context, apiClient *api.API, datasetID, taskSlug, scorerSlug string) error {
	// Delete dataset
	if err := apiClient.Datasets().Delete(ctx, datasetID); err != nil {
		return fmt.Errorf("dataset cleanup skipped (this is normal): %w", err)
	}

	// Delete task function
	functions := apiClient.Functions()
	if existing, _ := functions.Query(ctx, functionsapi.QueryParams{
		ProjectName: "go-sdk-examples",
		Slug:        taskSlug,
		Limit:       1,
	}); len(existing) > 0 {
		_ = functions.Delete(ctx, existing[0].ID)
	}

	// Delete scorer function
	if existing, _ := functions.Query(ctx, functionsapi.QueryParams{
		ProjectName: "go-sdk-examples",
		Slug:        scorerSlug,
		Limit:       1,
	}); len(existing) > 0 {
		_ = functions.Delete(ctx, existing[0].ID)
	}

	return nil
}