Skip to content

Commit 420e7de

Browse files
committed
Changed: Use NewEval() instead of RunEval()
1 parent 7da6d76 commit 420e7de

9 files changed

Lines changed: 52 additions & 36 deletions

File tree

README.md

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,8 @@ func main() {
139139
log.Fatal(err)
140140
}
141141

142-
// Define a reusable eval (task + scorers)
143-
greetingEval := &eval.Eval[string, string]{
142+
// Create an eval
143+
e := braintrust.NewEval(client, &eval.Eval[string, string]{
144144
Name: "greeting-experiment",
145145
Task: eval.T(func(ctx context.Context, input string) (string, error) {
146146
return "Hello " + input, nil
@@ -154,11 +154,10 @@ func main() {
154154
return eval.S(score), nil
155155
}),
156156
},
157-
}
157+
})
158158

159159
// Run against a dataset
160-
evaluator := braintrust.NewEvaluator[string, string](client)
161-
_, err = evaluator.RunEval(ctx, greetingEval, eval.RunOpts[string, string]{
160+
_, err = e.Run(ctx, eval.RunOpts[string, string]{
162161
Dataset: eval.NewDataset([]eval.Case[string, string]{
163162
{Input: "World", Expected: "Hello World"},
164163
{Input: "Alice", Expected: "Hello Alice"},

client.go

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -191,23 +191,24 @@ func (c *Client) Tracer(name string, opts ...oteltrace.TracerOption) oteltrace.T
191191
return c.tracerProvider.Tracer(name, opts...)
192192
}
193193

194-
// NewEvaluator creates a new evaluator for running evaluations with the same
195-
// input and output types.
194+
// NewEval creates a runnable [eval.Eval] by combining a client with an eval definition.
196195
//
197196
// Example:
198197
//
199198
// client, _ := braintrust.New(tp)
200-
// evaluator := braintrust.NewEvaluator[string, string](client)
201-
//
202-
// // Define a reusable eval, then run it with different datasets
203-
// myEval := &eval.Eval[string, string]{
204-
// Name: "my-eval",
199+
// e := braintrust.NewEval(client, &eval.Eval[string, string]{
200+
// Name: "classify",
205201
// Task: task,
206202
// Scorers: scorers,
207-
// }
208-
// result, _ := evaluator.RunEval(ctx, myEval, eval.RunOpts[string, string]{
209-
// Dataset: dataset,
210203
// })
204+
// result, _ := e.Run(ctx, eval.RunOpts[string, string]{Dataset: dataset})
205+
func NewEval[I, R any](client *Client, e *eval.Eval[I, R]) *eval.Eval[I, R] {
206+
evaluator := eval.NewEvaluator[I, R](client.session, client.tracerProvider, client.API(), client.config.DefaultProjectName)
207+
return eval.NewEval(evaluator, e)
208+
}
209+
210+
// NewEvaluator creates a new evaluator for running evaluations with the same
211+
// input and output types.
211212
func NewEvaluator[I, R any](client *Client) *eval.Evaluator[I, R] {
212213
return eval.NewEvaluator[I, R](client.session, client.tracerProvider, client.API(), client.config.DefaultProjectName)
213214
}

eval/eval.go

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,8 @@ type CaseProgress struct {
107107
}
108108

109109
// Eval defines an evaluation: the task to run and the scorers to apply.
110-
// Run it via [Evaluator.RunEval] or register it with a remote eval server.
110+
// Create one with [braintrust.NewEval], then call [Eval.Run] to execute it
111+
// or pass it to a remote eval server.
111112
type Eval[I, R any] struct {
112113
// Name is the eval name. Used as the default experiment name and as
113114
// the registration key when registered with a remote eval server.
@@ -120,8 +121,24 @@ type Eval[I, R any] struct {
120121
Scorers []Scorer[I, R]
121122

122123
// ProjectName is the Braintrust project for this eval.
123-
// Optional; falls back to the Evaluator's default project.
124+
// Optional; falls back to the default project from the client.
124125
ProjectName string
126+
127+
// evaluator holds the infrastructure (session, tracer, API client)
128+
// needed to run the eval. Set by NewEval / braintrust.NewEval.
129+
evaluator *Evaluator[I, R]
130+
}
131+
132+
// NewEval creates a runnable Eval by attaching an [Evaluator] as the default
133+
// runner. Users should call braintrust.NewEval rather than this directly.
134+
func NewEval[I, R any](evaluator *Evaluator[I, R], e *Eval[I, R]) *Eval[I, R] {
135+
e.evaluator = evaluator
136+
return e
137+
}
138+
139+
// Run executes the evaluation using the default [Evaluator].
140+
func (e *Eval[I, R]) Run(ctx context.Context, opts RunOpts[I, R]) (*Result, error) {
141+
return e.evaluator.Run(ctx, mergeOpts(e, opts))
125142
}
126143

127144
// RunOpts configures a single evaluation run. These vary per invocation;

eval/eval_integration_test.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -728,8 +728,8 @@ func TestEval_NoProjectName(t *testing.T) {
728728
assert.Contains(t, err.Error(), "project name is required")
729729
}
730730

731-
// TestRunEval_Integration tests RunEval with a reusable Eval definition.
732-
func TestRunEval_Integration(t *testing.T) {
731+
// TestEvalRun_Integration tests Eval.Run with a reusable Eval definition.
732+
func TestEvalRun_Integration(t *testing.T) {
733733
session, apiClient := setupIntegrationTest(t)
734734
t.Parallel()
735735

@@ -758,7 +758,8 @@ func TestRunEval_Integration(t *testing.T) {
758758
defer func() { _ = tp.Shutdown(ctx) }()
759759

760760
evaluator := NewEvaluator[string, string](session, tp, apiClient, cfg.DefaultProjectName)
761-
result, err := evaluator.RunEval(ctx, classify, RunOpts[string, string]{
761+
e := NewEval(evaluator, classify)
762+
result, err := e.Run(ctx, RunOpts[string, string]{
762763
Dataset: NewDataset([]Case[string, string]{
763764
{Input: "apple", Expected: "category-apple"},
764765
{Input: "banana", Expected: "category-banana"},

eval/eval_runopts_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -159,10 +159,10 @@ func TestMergeOpts_EvalReuse(t *testing.T) {
159159
assert.Equal(t, "base-project", ev.ProjectName)
160160
}
161161

162-
// TestRunEval_Success verifies that RunEval produces the same span structure as
163-
// the equivalent Run call. It uses the same testNewEval path as other unit tests
164-
// to avoid needing a real API client for experiment registration.
165-
func TestRunEval_Success(t *testing.T) {
162+
// TestEvalRun_Success verifies that Eval.Run produces the same span structure as
163+
// the equivalent Evaluator.Run call. It uses the same testNewEval path as other
164+
// unit tests to avoid needing a real API client for experiment registration.
165+
func TestEvalRun_Success(t *testing.T) {
166166
t.Parallel()
167167

168168
task := T(func(_ context.Context, in testInput) (testOutput, error) {

eval/evaluator.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,3 @@ func (e *Evaluator[I, R]) Datasets() *DatasetAPI[I, R] {
5252
func (e *Evaluator[I, R]) Run(ctx context.Context, opts Opts[I, R]) (*Result, error) {
5353
return run(ctx, opts, e.session, e.tracerProvider, e.api, e.defaultProjectName)
5454
}
55-
56-
// RunEval executes an evaluation from a reusable [Eval] definition.
57-
func (e *Evaluator[I, R]) RunEval(ctx context.Context, ev *Eval[I, R], opts RunOpts[I, R]) (*Result, error) {
58-
return run(ctx, mergeOpts(ev, opts), e.session, e.tracerProvider, e.api, e.defaultProjectName)
59-
}

eval/example_test.go

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,8 @@ func Example_evalDefinition() {
7777
log.Fatal(err)
7878
}
7979

80-
// Define a reusable eval (task + scorers + project)
81-
classify := &eval.Eval[string, string]{
80+
// Create a runnable eval
81+
e := braintrust.NewEval(client, &eval.Eval[string, string]{
8282
Name: "classify",
8383
Task: eval.T(func(ctx context.Context, input string) (string, error) {
8484
return input + "!", nil
@@ -92,11 +92,10 @@ func Example_evalDefinition() {
9292
}),
9393
},
9494
ProjectName: "test-project",
95-
}
95+
})
9696

97-
// Run the eval with a specific dataset
98-
evaluator := braintrust.NewEvaluator[string, string](client)
99-
result, err := evaluator.RunEval(ctx, classify, eval.RunOpts[string, string]{
97+
// Run it
98+
result, err := e.Run(ctx, eval.RunOpts[string, string]{
10099
Dataset: eval.NewDataset([]eval.Case[string, string]{
101100
{Input: "hello", Expected: "hello!"},
102101
}),
File renamed without changes.

server/register.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,8 +207,12 @@ func (r *registeredEvalImpl[I, R]) run(ctx context.Context, cfg *evalRunConfig)
207207
}
208208
}
209209

210+
// Create a per-request evaluator with the caller's session, not the
211+
// default evaluator on the Eval, so traces are attributed to the user
212+
// who triggered the request.
210213
evaluator := eval.NewEvaluator[I, R](cfg.auth.session, tp, apiClient, r.projectName())
211-
result, evalErr := evaluator.RunEval(evalCtx, r.def, eval.RunOpts[I, R]{
214+
e := eval.NewEval(evaluator, r.def)
215+
result, evalErr := e.Run(evalCtx, eval.RunOpts[I, R]{
212216
Experiment: experimentName,
213217
Dataset: dataset,
214218
ProjectName: r.projectName(),

0 commit comments

Comments
 (0)