Skip to content

Commit 8da0d6b

Browse files
committed
Merge branch 'main' of github.com:launchdarkly/java-core into mmccarthy/AIC-2854/java-enable-raw-prompt
2 parents b5c9cdf + a32c4fa commit 8da0d6b

12 files changed

Lines changed: 1012 additions & 10 deletions

File tree

lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AIAgentConfig.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,9 @@ public final class AIAgentConfig extends AIConfig {
3030
String instructions,
3131
JudgeConfiguration judgeConfiguration,
3232
Map<String, Tool> tools,
33-
Supplier<LDAIConfigTracker> trackerFactory) {
34-
super(key, enabled, Mode.AGENT, model, provider, trackerFactory);
33+
Supplier<LDAIConfigTracker> trackerFactory,
34+
Evaluator evaluator) {
35+
super(key, enabled, Mode.AGENT, model, provider, trackerFactory, evaluator);
3536
this.instructions = instructions;
3637
this.judgeConfiguration = judgeConfiguration;
3738
this.tools = tools == null ? null : Collections.unmodifiableMap(tools);

lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AICompletionConfig.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@ public final class AICompletionConfig extends AIConfig {
3232
List<Message> messages,
3333
JudgeConfiguration judgeConfiguration,
3434
Map<String, Tool> tools,
35-
Supplier<LDAIConfigTracker> trackerFactory) {
36-
super(key, enabled, Mode.COMPLETION, model, provider, trackerFactory);
35+
Supplier<LDAIConfigTracker> trackerFactory,
36+
Evaluator evaluator) {
37+
super(key, enabled, Mode.COMPLETION, model, provider, trackerFactory, evaluator);
3738
this.messages = messages == null ? null : Collections.unmodifiableList(messages);
3839
this.judgeConfiguration = judgeConfiguration;
3940
this.tools = tools == null ? null : Collections.unmodifiableMap(tools);

lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AIConfig.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,20 +24,23 @@ public abstract class AIConfig {
2424
private final Model model;
2525
private final Provider provider;
2626
private final Supplier<LDAIConfigTracker> trackerFactory;
27+
private final Evaluator evaluator;
2728

2829
AIConfig(
2930
String key,
3031
boolean enabled,
3132
Mode mode,
3233
Model model,
3334
Provider provider,
34-
Supplier<LDAIConfigTracker> trackerFactory) {
35+
Supplier<LDAIConfigTracker> trackerFactory,
36+
Evaluator evaluator) {
3537
this.key = key;
3638
this.enabled = enabled;
3739
this.mode = mode;
3840
this.model = model;
3941
this.provider = provider;
4042
this.trackerFactory = Objects.requireNonNull(trackerFactory, "trackerFactory");
43+
this.evaluator = Objects.requireNonNull(evaluator, "evaluator");
4144
}
4245

4346
/**
@@ -102,4 +105,17 @@ public Provider getProvider() {
102105
public LDAIConfigTracker createTracker() {
103106
return trackerFactory.get();
104107
}
108+
109+
/**
110+
* Returns the evaluator that coordinates judge execution for this configuration.
111+
* <p>
112+
* For {@link AIJudgeConfig} this is always {@link Evaluator#noop()}. For
113+
* {@link AICompletionConfig} and {@link AIAgentConfig} it is the evaluator supplied at
114+
* construction time (also {@link Evaluator#noop()} unless a custom one is wired in).
115+
*
116+
* @return the evaluator, never {@code null}
117+
*/
118+
public Evaluator getEvaluator() {
119+
return evaluator;
120+
}
105121
}

lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AIJudgeConfig.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ public final class AIJudgeConfig extends AIConfig {
2929
List<Message> messages,
3030
String evaluationMetricKey,
3131
Supplier<LDAIConfigTracker> trackerFactory) {
32-
super(key, enabled, Mode.JUDGE, model, provider, trackerFactory);
32+
super(key, enabled, Mode.JUDGE, model, provider, trackerFactory, Evaluator.noop());
3333
this.messages = messages == null ? null : Collections.unmodifiableList(messages);
3434
this.evaluationMetricKey = evaluationMetricKey;
3535
}
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
package com.launchdarkly.sdk.server.ai;
2+
3+
import com.launchdarkly.logging.LDLogger;
4+
import com.launchdarkly.sdk.server.ai.datamodel.LDAIConfigTypes.JudgeConfiguration;
5+
import com.launchdarkly.sdk.server.ai.datamodel.LDAITrackingTypes.JudgeResult;
6+
7+
import java.util.ArrayList;
8+
import java.util.Collections;
9+
import java.util.HashMap;
10+
import java.util.List;
11+
import java.util.Map;
12+
import java.util.concurrent.CompletableFuture;
13+
14+
/**
15+
* Coordinates evaluation of an AI Config output by running a set of {@link Judge} instances.
16+
* <p>
17+
* An {@code Evaluator} is attached to an {@link AICompletionConfig} or {@link AIAgentConfig} and
18+
* invoked by managed AI types (plan 4). In v1.0, the evaluator returned by the config retrieval
19+
* methods is always a noop that returns an empty list immediately.
20+
* <p>
21+
* Instances are immutable and thread-safe.
22+
*/
23+
public final class Evaluator {
24+
private static final Evaluator NOOP = new Evaluator();
25+
26+
private final Map<String, Judge> judges;
27+
private final JudgeConfiguration judgeConfiguration;
28+
private final LDLogger logger;
29+
30+
private Evaluator() {
31+
this.judges = Collections.emptyMap();
32+
this.judgeConfiguration = null;
33+
this.logger = null;
34+
}
35+
36+
/**
37+
* Constructs an evaluator with the given judges and configuration.
38+
*
39+
* @param judges a map from judge config key to {@link Judge} instance; a {@code null} value is
40+
* treated as an empty map
41+
* @param judgeConfiguration the judge configuration listing which judges to run and their sampling
42+
* rates
43+
* @param logger the logger
44+
*/
45+
public Evaluator(Map<String, Judge> judges, JudgeConfiguration judgeConfiguration, LDLogger logger) {
46+
this.judges = judges != null
47+
? Collections.unmodifiableMap(new HashMap<>(judges))
48+
: Collections.emptyMap();
49+
this.judgeConfiguration = judgeConfiguration;
50+
this.logger = logger;
51+
}
52+
53+
/**
54+
* Returns the shared noop evaluator, which immediately returns an empty result list without
55+
* logging any warnings.
56+
*
57+
* @return the noop singleton, never {@code null}
58+
*/
59+
public static Evaluator noop() {
60+
return NOOP;
61+
}
62+
63+
/**
64+
* Runs all configured judges against the given input/output pair and returns their results.
65+
* <p>
66+
* Judges are run sequentially in the order specified by the {@link JudgeConfiguration}.
67+
* Returns an empty list immediately when no judge configuration is present.
68+
* Judges referenced in the configuration but absent from the judges map are skipped with a
69+
* warning; this is not an error.
70+
* <p>
71+
* This method does NOT call {@code trackJudgeResult} — that is the caller's responsibility.
72+
*
73+
* @param input the message history or prompt that was sent to the model
74+
* @param output the model's response to evaluate
75+
* @return a completed future holding the list of judge results; never {@code null}
76+
*/
77+
public CompletableFuture<List<JudgeResult>> evaluate(String input, String output) {
78+
if (judgeConfiguration == null) {
79+
return CompletableFuture.completedFuture(Collections.emptyList());
80+
}
81+
82+
List<JudgeResult> results = new ArrayList<>();
83+
for (JudgeConfiguration.Judge entry : judgeConfiguration.getJudges()) {
84+
Judge judge = judges.get(entry.getKey());
85+
if (judge == null) {
86+
if (logger != null) logger.warn("Evaluator: no judge found for key '{}', skipping", entry.getKey());
87+
continue;
88+
}
89+
results.add(judge.evaluate(input, output, entry.getSamplingRate()));
90+
}
91+
return CompletableFuture.completedFuture(results);
92+
}
93+
}
Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
package com.launchdarkly.sdk.server.ai;
2+
3+
import com.launchdarkly.logging.LDLogger;
4+
import com.launchdarkly.sdk.server.ai.datamodel.LDAIConfigTypes.Message;
5+
import com.launchdarkly.sdk.server.ai.datamodel.LDAITrackingTypes.JudgeResult;
6+
7+
import java.util.Arrays;
8+
import java.util.Collections;
9+
import java.util.HashMap;
10+
import java.util.List;
11+
import java.util.Map;
12+
import java.util.concurrent.ThreadLocalRandom;
13+
import java.util.stream.Collectors;
14+
15+
/**
16+
* Evaluates an AI model output against a judge prompt, returning a scored {@link JudgeResult}.
17+
* <p>
18+
* A {@code Judge} wraps an {@link AIJudgeConfig} and a {@link Runner}. Each call to
19+
* {@link #evaluate} or {@link #evaluateMessages} invokes the runner with a formatted evaluation
20+
* prompt and parses the structured {@code {score, reasoning}} response. Evaluation can be sampled
21+
* to reduce cost: pass a {@code samplingRate} of {@code 0.0} to always skip, or {@code 1.0} to
22+
* always run.
23+
* <p>
24+
* Instances are immutable and thread-safe.
25+
*/
26+
public final class Judge {
27+
/**
28+
* JSON-Schema fragment sent to the runner as the {@code outputType}, requesting structured
29+
* {@code {score, reasoning}} output.
30+
*/
31+
private static final Map<String, Object> EVALUATION_SCHEMA;
32+
static {
33+
Map<String, Object> scoreSchema = new HashMap<>();
34+
scoreSchema.put("type", "number");
35+
scoreSchema.put("minimum", 0);
36+
scoreSchema.put("maximum", 1);
37+
scoreSchema.put("description", "Score between 0.0 and 1.0.");
38+
39+
Map<String, Object> reasoningSchema = new HashMap<>();
40+
reasoningSchema.put("type", "string");
41+
reasoningSchema.put("description", "Reasoning behind the score.");
42+
43+
Map<String, Object> properties = new HashMap<>();
44+
properties.put("score", Collections.unmodifiableMap(scoreSchema));
45+
properties.put("reasoning", Collections.unmodifiableMap(reasoningSchema));
46+
47+
Map<String, Object> schema = new HashMap<>();
48+
schema.put("type", "object");
49+
schema.put("properties", Collections.unmodifiableMap(properties));
50+
schema.put("required", Arrays.asList("score", "reasoning"));
51+
schema.put("additionalProperties", false);
52+
53+
EVALUATION_SCHEMA = Collections.unmodifiableMap(schema);
54+
}
55+
56+
private final AIJudgeConfig config;
57+
private final Runner runner;
58+
private final LDLogger logger;
59+
60+
/**
61+
* Constructs a judge.
62+
*
63+
* @param config the judge AI Config
64+
* @param runner the runner to invoke
65+
* @param logger the logger
66+
*/
67+
public Judge(AIJudgeConfig config, Runner runner, LDLogger logger) {
68+
this.config = config;
69+
this.runner = runner;
70+
this.logger = logger;
71+
}
72+
73+
/**
74+
* Evaluates the given input/output pair, always running (sampling rate {@code 1.0}).
75+
*
76+
* @param input the message history or prompt that was sent to the model
77+
* @param output the model's response to evaluate
78+
* @return the evaluation result; never {@code null}
79+
*/
80+
public JudgeResult evaluate(String input, String output) {
81+
return evaluate(input, output, 1.0);
82+
}
83+
84+
/**
85+
* Evaluates the given input/output pair, subject to the given sampling rate.
86+
*
87+
* @param input the message history or prompt that was sent to the model
88+
* @param output the model's response to evaluate
89+
* @param samplingRate the fraction of evaluations to actually run; {@code 0.0} always skips,
90+
* {@code 1.0} always runs
91+
* @return the evaluation result; never {@code null}
92+
*/
93+
public JudgeResult evaluate(String input, String output, double samplingRate) {
94+
if (samplingRate <= 0.0) {
95+
return JudgeResult.builder()
96+
.sampled(false)
97+
.success(false)
98+
.judgeConfigKey(config.getKey())
99+
.metricKey(config.getEvaluationMetricKey())
100+
.build();
101+
}
102+
if (ThreadLocalRandom.current().nextDouble() > samplingRate) {
103+
return JudgeResult.builder()
104+
.sampled(false)
105+
.success(false)
106+
.judgeConfigKey(config.getKey())
107+
.metricKey(config.getEvaluationMetricKey())
108+
.build();
109+
}
110+
111+
try {
112+
String formatted = "MESSAGE HISTORY:\n" + input + "\n\nRESPONSE TO EVALUATE:\n" + output;
113+
LDAIConfigTracker tracker = config.createTracker();
114+
115+
RunnerResult result = tracker.trackMetricsOf(RunnerResult::getMetrics, () -> runner.run(formatted, EVALUATION_SCHEMA));
116+
117+
Map<String, Object> parsed = result.getParsed();
118+
if (parsed == null) {
119+
if (logger != null) logger.warn("Judge {}: runner returned null parsed output", config.getKey());
120+
return JudgeResult.builder()
121+
.sampled(true)
122+
.success(false)
123+
.judgeConfigKey(config.getKey())
124+
.metricKey(config.getEvaluationMetricKey())
125+
.build();
126+
}
127+
128+
Object scoreRaw = parsed.get("score");
129+
if (!(scoreRaw instanceof Number)) {
130+
if (logger != null) logger.warn("Judge {}: parsed output missing numeric score", config.getKey());
131+
return JudgeResult.builder()
132+
.sampled(true)
133+
.success(false)
134+
.judgeConfigKey(config.getKey())
135+
.metricKey(config.getEvaluationMetricKey())
136+
.build();
137+
}
138+
double score = ((Number) scoreRaw).doubleValue();
139+
if (!Double.isFinite(score) || score < 0.0 || score > 1.0) {
140+
if (logger != null) logger.warn("Judge {}: score {} is outside [0.0, 1.0]", config.getKey(), score);
141+
return JudgeResult.builder()
142+
.sampled(true)
143+
.success(false)
144+
.judgeConfigKey(config.getKey())
145+
.metricKey(config.getEvaluationMetricKey())
146+
.build();
147+
}
148+
149+
JudgeResult.Builder resultBuilder = JudgeResult.builder()
150+
.sampled(true)
151+
.success(true)
152+
.judgeConfigKey(config.getKey())
153+
.metricKey(config.getEvaluationMetricKey())
154+
.score(score);
155+
156+
Object reasoningRaw = parsed.get("reasoning");
157+
if (reasoningRaw instanceof String) {
158+
resultBuilder.reasoning((String) reasoningRaw);
159+
} else if (reasoningRaw != null) {
160+
if (logger != null) logger.warn("Judge {}: reasoning is not a string, ignoring", config.getKey());
161+
}
162+
163+
return resultBuilder.build();
164+
} catch (Exception ex) {
165+
return JudgeResult.builder()
166+
.sampled(true)
167+
.success(false)
168+
.judgeConfigKey(config.getKey())
169+
.metricKey(config.getEvaluationMetricKey())
170+
.errorMessage(ex.getMessage())
171+
.build();
172+
}
173+
}
174+
175+
/**
176+
* Evaluates a message list and runner response, always running (sampling rate {@code 1.0}).
177+
* <p>
178+
* Messages are formatted as {@code role: content} lines, joined by newlines.
179+
*
180+
* @param messages the messages that were sent to the model
181+
* @param response the runner result whose {@link RunnerResult#getContent() content} is evaluated
182+
* @return the evaluation result; never {@code null}
183+
*/
184+
public JudgeResult evaluateMessages(List<Message> messages, RunnerResult response) {
185+
return evaluateMessages(messages, response, 1.0);
186+
}
187+
188+
/**
189+
* Evaluates a message list and runner response, subject to the given sampling rate.
190+
* <p>
191+
* Messages are formatted as {@code role: content} lines, joined by newlines.
192+
*
193+
* @param messages the messages that were sent to the model
194+
* @param response the runner result whose {@link RunnerResult#getContent() content} is evaluated
195+
* @param samplingRate the fraction of evaluations to actually run
196+
* @return the evaluation result; never {@code null}
197+
*/
198+
public JudgeResult evaluateMessages(List<Message> messages, RunnerResult response, double samplingRate) {
199+
String formattedMessages = messages == null ? "" : messages.stream()
200+
.map(m -> m.getRole().getWireValue() + ": " + m.getContent())
201+
.collect(Collectors.joining("\n"));
202+
return evaluate(formattedMessages, response == null ? "" : response.getContent(), samplingRate);
203+
}
204+
205+
/**
206+
* Returns the judge AI Config this instance was constructed with.
207+
*
208+
* @return the judge config
209+
*/
210+
public AIJudgeConfig getConfig() {
211+
return config;
212+
}
213+
214+
/**
215+
* Returns the runner this instance was constructed with.
216+
*
217+
* @return the runner
218+
*/
219+
public Runner getRunner() {
220+
return runner;
221+
}
222+
}

0 commit comments

Comments
 (0)