Skip to content

Commit f8682b7

Browse files
authored
Merge pull request #11 from braintrustdata/ark/eval-task-output
Rework evals api
2 parents 3a887b7 + c1a69f4 commit f8682b7

13 files changed

Lines changed: 342 additions & 116 deletions

File tree

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,9 @@ Function<String, String> getFoodType =
5252
var eval = braintrust.<String, String>evalBuilder()
5353
.name("java-eval-x-" + System.currentTimeMillis())
5454
.cases(
55-
EvalCase.of("asparagus", "vegetable"),
56-
EvalCase.of("banana", "fruit"))
57-
.task(getFoodType)
55+
DatasetCase.of("asparagus", "vegetable"),
56+
DatasetCase.of("banana", "fruit"))
57+
.taskFunction(getFoodType)
5858
.scorers(
5959
Scorer.of(
6060
"fruit_scorer",

examples/src/main/java/dev/braintrust/examples/ExperimentExample.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import com.openai.models.ChatModel;
55
import com.openai.models.chat.completions.ChatCompletionCreateParams;
66
import dev.braintrust.Braintrust;
7-
import dev.braintrust.eval.EvalCase;
7+
import dev.braintrust.eval.DatasetCase;
88
import dev.braintrust.eval.Scorer;
99
import dev.braintrust.instrumentation.openai.BraintrustOpenAI;
1010
import java.util.function.Function;
@@ -37,10 +37,10 @@ public static void main(String[] args) throws Exception {
3737
// will append new cases to
3838
// the same experiment
3939
.cases(
40-
EvalCase.of("strawberry", "fruit"),
41-
EvalCase.of("asparagus", "vegetable"),
42-
EvalCase.of("apple", "fruit"),
43-
EvalCase.of("banana", "fruit"))
40+
DatasetCase.of("strawberry", "fruit"),
41+
DatasetCase.of("asparagus", "vegetable"),
42+
DatasetCase.of("apple", "fruit"),
43+
DatasetCase.of("banana", "fruit"))
4444
.taskFunction(getFoodType)
4545
.scorers(
4646
Scorer.of(
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
package dev.braintrust.eval;
2+
3+
import java.util.List;
4+
import java.util.Optional;
5+
import javax.annotation.concurrent.NotThreadSafe;
6+
7+
/**
8+
* Datasets define the cases for evals. This interface provides a means of iterating through all
9+
* cases of a particular dataset.
10+
*
11+
* <p>The most common implementations are in-memory datasets, and datasets fetched from the
12+
* Braintrust API.
13+
*/
14+
public interface Dataset<INPUT, OUTPUT> {
15+
Cursor<DatasetCase<INPUT, OUTPUT>> openCursor();
16+
17+
String id();
18+
19+
String version();
20+
21+
@NotThreadSafe
22+
interface Cursor<CASE> extends AutoCloseable {
23+
/**
24+
* Fetch the next case. Returns empty if there are no more cases to fetch.
25+
*
26+
* <p>Implementations may make external requests to fetch data.
27+
*
28+
* <p>If this method is invoked after {@link #close()} an IllegalStateException will be
29+
* thrown
30+
*/
31+
Optional<CASE> next();
32+
33+
/** close all cursor resources */
34+
void close();
35+
}
36+
37+
/** Create an in-memory Dataset containing the provided cases. */
38+
@SafeVarargs
39+
static <INPUT, OUTPUT> Dataset<INPUT, OUTPUT> of(DatasetCase<INPUT, OUTPUT>... cases) {
40+
return new DatasetInMemoryImpl<>(List.of(cases));
41+
}
42+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
package dev.braintrust.eval;
2+
3+
import java.util.List;
4+
import java.util.Map;
5+
import javax.annotation.Nonnull;
6+
7+
/** A single row in a dataset. */
8+
public record DatasetCase<INPUT, OUTPUT>(
9+
INPUT input,
10+
OUTPUT expected,
11+
@Nonnull List<String> tags,
12+
@Nonnull Map<String, Object> metadata) {
13+
public DatasetCase {
14+
if (!metadata.isEmpty()) {
15+
throw new RuntimeException("TODO: metadata support not yet implemented");
16+
}
17+
if (!tags.isEmpty()) {
18+
throw new RuntimeException("TODO: tags support not yet implemented");
19+
}
20+
}
21+
22+
public static <INPUT, OUTPUT> DatasetCase<INPUT, OUTPUT> of(INPUT input, OUTPUT expected) {
23+
return new DatasetCase<>(input, expected, List.of(), Map.of());
24+
}
25+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
package dev.braintrust.eval;
2+
3+
import java.util.List;
4+
import java.util.Optional;
5+
6+
/** A dataset held entirely in memory */
7+
class DatasetInMemoryImpl<INPUT, OUTPUT> implements Dataset<INPUT, OUTPUT> {
8+
private final List<DatasetCase<INPUT, OUTPUT>> cases;
9+
private final String id;
10+
11+
DatasetInMemoryImpl(List<DatasetCase<INPUT, OUTPUT>> cases) {
12+
this.cases = List.copyOf(cases);
13+
id = "in-memory-dataset<" + this.cases.hashCode() + ">";
14+
}
15+
16+
@Override
17+
public String id() {
18+
return id;
19+
}
20+
21+
@Override
22+
public String version() {
23+
return "0";
24+
}
25+
26+
@Override
27+
public Cursor<DatasetCase<INPUT, OUTPUT>> openCursor() {
28+
return new Cursor<>() {
29+
int nextIndex = 0;
30+
boolean closed = false;
31+
32+
@Override
33+
public Optional<DatasetCase<INPUT, OUTPUT>> next() {
34+
if (closed) {
35+
throw new IllegalStateException("this method may not be invoked after close");
36+
} else if (nextIndex < cases.size()) {
37+
return Optional.of(cases.get(nextIndex++));
38+
} else {
39+
return Optional.empty();
40+
}
41+
}
42+
43+
@Override
44+
public void close() {
45+
closed = true;
46+
}
47+
};
48+
}
49+
}

0 commit comments

Comments
 (0)