launchdarkly
diff --git a/‎lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AIAgentConfig.java‎
Lines changed: 3 additions & 2 deletions b/‎lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AIAgentConfig.java‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AICompletionConfig.java‎
Lines changed: 3 additions & 2 deletions b/‎lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AICompletionConfig.java‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AIConfig.java‎
Lines changed: 17 additions & 1 deletion b/‎lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AIConfig.java‎
Lines changed: 17 additions & 1 deletion
diff --git a/‎lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AIJudgeConfig.java‎
Lines changed: 1 addition & 1 deletion b/‎lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AIJudgeConfig.java‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/Evaluator.java‎
Lines changed: 93 additions & 0 deletions b/‎lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/Evaluator.java‎
Lines changed: 93 additions & 0 deletions
diff --git a/‎lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/Judge.java‎
Lines changed: 195 additions & 0 deletions b/‎lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/Judge.java‎
Lines changed: 195 additions & 0 deletions
@@ -30,8 +30,9 @@ public final class AIAgentConfig extends AIConfig {
       String instructions,
       JudgeConfiguration judgeConfiguration,
       Map<String, Tool> tools,
-      Supplier<LDAIConfigTracker> trackerFactory) {
-    super(key, enabled, Mode.AGENT, model, provider, trackerFactory);
+      Supplier<LDAIConfigTracker> trackerFactory,
+      Evaluator evaluator) {
+    super(key, enabled, Mode.AGENT, model, provider, trackerFactory, evaluator);
     this.instructions = instructions;
     this.judgeConfiguration = judgeConfiguration;
     this.tools = tools == null ? null : Collections.unmodifiableMap(tools);
 
@@ -32,8 +32,9 @@ public final class AICompletionConfig extends AIConfig {
       List<Message> messages,
       JudgeConfiguration judgeConfiguration,
       Map<String, Tool> tools,
-      Supplier<LDAIConfigTracker> trackerFactory) {
-    super(key, enabled, Mode.COMPLETION, model, provider, trackerFactory);
+      Supplier<LDAIConfigTracker> trackerFactory,
+      Evaluator evaluator) {
+    super(key, enabled, Mode.COMPLETION, model, provider, trackerFactory, evaluator);
     this.messages = messages == null ? null : Collections.unmodifiableList(messages);
     this.judgeConfiguration = judgeConfiguration;
     this.tools = tools == null ? null : Collections.unmodifiableMap(tools);
 
@@ -24,20 +24,23 @@ public abstract class AIConfig {
   private final Model model;
   private final Provider provider;
   private final Supplier<LDAIConfigTracker> trackerFactory;
+  private final Evaluator evaluator;
 
   AIConfig(
       String key,
       boolean enabled,
       Mode mode,
       Model model,
       Provider provider,
-      Supplier<LDAIConfigTracker> trackerFactory) {
+      Supplier<LDAIConfigTracker> trackerFactory,
+      Evaluator evaluator) {
     this.key = key;
     this.enabled = enabled;
     this.mode = mode;
     this.model = model;
     this.provider = provider;
     this.trackerFactory = Objects.requireNonNull(trackerFactory, "trackerFactory");
+    this.evaluator = Objects.requireNonNull(evaluator, "evaluator");
   }
 
   /**
@@ -102,4 +105,17 @@ public Provider getProvider() {
   public LDAIConfigTracker createTracker() {
     return trackerFactory.get();
   }
+
+  /**
+   * Returns the evaluator that coordinates judge execution for this configuration.
+   * <p>
+   * For {@link AIJudgeConfig} this is always {@link Evaluator#noop()}. For
+   * {@link AICompletionConfig} and {@link AIAgentConfig} it is the evaluator supplied at
+   * construction time (also {@link Evaluator#noop()} unless a custom one is wired in).
+   *
+   * @return the evaluator, never {@code null}
+   */
+  public Evaluator getEvaluator() {
+    return evaluator;
+  }
 }
@@ -29,7 +29,7 @@ public final class AIJudgeConfig extends AIConfig {
       List<Message> messages,
       String evaluationMetricKey,
       Supplier<LDAIConfigTracker> trackerFactory) {
-    super(key, enabled, Mode.JUDGE, model, provider, trackerFactory);
+    super(key, enabled, Mode.JUDGE, model, provider, trackerFactory, Evaluator.noop());
     this.messages = messages == null ? null : Collections.unmodifiableList(messages);
     this.evaluationMetricKey = evaluationMetricKey;
   }
 
@@ -0,0 +1,93 @@
+package com.launchdarkly.sdk.server.ai;
+
+import com.launchdarkly.logging.LDLogger;
+import com.launchdarkly.sdk.server.ai.datamodel.LDAIConfigTypes.JudgeConfiguration;
+import com.launchdarkly.sdk.server.ai.datamodel.LDAITrackingTypes.JudgeResult;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.concurrent.CompletableFuture;
+
+/**
+ * Coordinates evaluation of an AI Config output by running a set of {@link Judge} instances.
+ * <p>
+ * An {@code Evaluator} is attached to an {@link AICompletionConfig} or {@link AIAgentConfig} and
+ * invoked by managed AI types (plan 4). In v1.0, the evaluator returned by the config retrieval
+ * methods is always a noop that returns an empty list immediately.
+ * <p>
+ * Instances are immutable and thread-safe.
+ */
+public final class Evaluator {
+  private static final Evaluator NOOP = new Evaluator();
+
+  private final Map<String, Judge> judges;
+  private final JudgeConfiguration judgeConfiguration;
+  private final LDLogger logger;
+  private final boolean isNoop;
+
+  private Evaluator() {
+    this.judges = Collections.emptyMap();
+    this.judgeConfiguration = null;
+    this.logger = null;
+    this.isNoop = true;
+  }
+
+  /**
+   * Constructs an evaluator with the given judges and configuration.
+   *
+   * @param judges a map from judge config key to {@link Judge} instance; must not be {@code null}
+   * @param judgeConfiguration the judge configuration listing which judges to run and their sampling
+   *     rates; must not be {@code null}
+   * @param logger the logger; must not be {@code null}
+   */
+  public Evaluator(Map<String, Judge> judges, JudgeConfiguration judgeConfiguration, LDLogger logger) {
+    this.judges = Objects.requireNonNull(judges, "judges");
+    this.judgeConfiguration = Objects.requireNonNull(judgeConfiguration, "judgeConfiguration");
+    this.logger = Objects.requireNonNull(logger, "logger");
+    this.isNoop = false;
+  }
+
+  /**
+   * Returns the shared noop evaluator, which immediately returns an empty result list without
+   * logging any warnings.
+   *
+   * @return the noop singleton, never {@code null}
+   */
+  public static Evaluator noop() {
+    return NOOP;
+  }
+
+  /**
+   * Runs all configured judges against the given input/output pair and returns their results.
+   * <p>
+   * When this is the noop evaluator, returns a completed future holding an empty list immediately.
+   * Otherwise, judges are run sequentially in the order specified by the {@link JudgeConfiguration}.
+   * Judges referenced in the configuration but absent from the judges map are skipped with a
+   * warning; this is not an error.
+   * <p>
+   * This method does NOT call {@code trackJudgeResult} — that is the caller's responsibility.
+   *
+   * @param input the message history or prompt that was sent to the model
+   * @param output the model's response to evaluate
+   * @return a completed future holding the list of judge results; never {@code null}
+   */
+  public CompletableFuture<List<JudgeResult>> evaluate(String input, String output) {
+    if (isNoop) {
+      return CompletableFuture.completedFuture(Collections.emptyList());
+    }
+
+    List<JudgeResult> results = new ArrayList<>();
+    for (JudgeConfiguration.Judge entry : judgeConfiguration.getJudges()) {
+      Judge judge = judges.get(entry.getKey());
+      if (judge == null) {
+        logger.warn("Evaluator: no judge found for key '{}', skipping", entry.getKey());
+        continue;
+      }
+      results.add(judge.evaluate(input, output, entry.getSamplingRate()));
+    }
+    return CompletableFuture.completedFuture(results);
+  }
+}
@@ -0,0 +1,195 @@
+package com.launchdarkly.sdk.server.ai;
+
+import com.launchdarkly.logging.LDLogger;
+import com.launchdarkly.sdk.server.ai.datamodel.LDAIConfigTypes.Message;
+import com.launchdarkly.sdk.server.ai.datamodel.LDAITrackingTypes.JudgeResult;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.stream.Collectors;
+
+/**
+ * Evaluates an AI model output against a judge prompt, returning a scored {@link JudgeResult}.
+ * <p>
+ * A {@code Judge} wraps an {@link AIJudgeConfig} and a {@link Runner}. Each call to
+ * {@link #evaluate} or {@link #evaluateMessages} invokes the runner with a formatted evaluation
+ * prompt and parses the structured {@code {score, reasoning}} response. Evaluation can be sampled
+ * to reduce cost: pass a {@code samplingRate} of {@code 0.0} to always skip, or {@code 1.0} to
+ * always run.
+ * <p>
+ * Instances are immutable and thread-safe.
+ */
+public final class Judge {
+  /**
+   * JSON-Schema fragment sent to the runner as the {@code outputType}, requesting structured
+   * {@code {score, reasoning}} output.
+   */
+  private static final Map<String, Object> EVALUATION_SCHEMA = Map.of(
+      "type", "object",
+      "properties", Map.of(
+          "score", Map.of("type", "number"),
+          "reasoning", Map.of("type", "string")),
+      "required", List.of("score", "reasoning"));
+
+  private final AIJudgeConfig config;
+  private final Runner runner;
+  private final LDLogger logger;
+
+  /**
+   * Constructs a judge.
+   *
+   * @param config the judge AI Config; must not be {@code null}
+   * @param runner the runner to invoke; must not be {@code null}
+   * @param logger the logger; must not be {@code null}
+   */
+  public Judge(AIJudgeConfig config, Runner runner, LDLogger logger) {
+    this.config = Objects.requireNonNull(config, "config");
+    this.runner = Objects.requireNonNull(runner, "runner");
+    this.logger = Objects.requireNonNull(logger, "logger");
+  }
+
+  /**
+   * Evaluates the given input/output pair, always running (sampling rate {@code 1.0}).
+   *
+   * @param input the message history or prompt that was sent to the model
+   * @param output the model's response to evaluate
+   * @return the evaluation result; never {@code null}
+   */
+  public JudgeResult evaluate(String input, String output) {
+    return evaluate(input, output, 1.0);
+  }
+
+  /**
+   * Evaluates the given input/output pair, subject to the given sampling rate.
+   *
+   * @param input the message history or prompt that was sent to the model
+   * @param output the model's response to evaluate
+   * @param samplingRate the fraction of evaluations to actually run; {@code 0.0} always skips,
+   *     {@code 1.0} always runs
+   * @return the evaluation result; never {@code null}
+   */
+  public JudgeResult evaluate(String input, String output, double samplingRate) {
+    if (ThreadLocalRandom.current().nextDouble() >= samplingRate) {
+      return JudgeResult.builder()
+          .sampled(false)
+          .success(false)
+          .judgeConfigKey(config.getKey())
+          .metricKey(config.getEvaluationMetricKey())
+          .build();
+    }
+
+    String formatted = "MESSAGE HISTORY:\n" + input + "\n\nRESPONSE TO EVALUATE:\n" + output;
+    LDAIConfigTracker tracker = config.createTracker();
+
+    RunnerResult result;
+    try {
+      result = tracker.trackMetricsOf(RunnerResult::getMetrics, () -> runner.run(formatted, EVALUATION_SCHEMA));
+    } catch (Exception ex) {
+      return JudgeResult.builder()
+          .sampled(true)
+          .success(false)
+          .judgeConfigKey(config.getKey())
+          .metricKey(config.getEvaluationMetricKey())
+          .errorMessage(ex.getMessage())
+          .build();
+    }
+
+    Map<String, Object> parsed = result.getParsed();
+    if (parsed == null) {
+      logger.warn("Judge {}: runner returned null parsed output", config.getKey());
+      return JudgeResult.builder()
+          .sampled(true)
+          .success(false)
+          .judgeConfigKey(config.getKey())
+          .metricKey(config.getEvaluationMetricKey())
+          .build();
+    }
+
+    Object scoreRaw = parsed.get("score");
+    if (!(scoreRaw instanceof Number)) {
+      logger.warn("Judge {}: parsed output missing numeric score", config.getKey());
+      return JudgeResult.builder()
+          .sampled(true)
+          .success(false)
+          .judgeConfigKey(config.getKey())
+          .metricKey(config.getEvaluationMetricKey())
+          .build();
+    }
+    double score = ((Number) scoreRaw).doubleValue();
+    if (score < 0.0 || score > 1.0) {
+      logger.warn("Judge {}: score {} is outside [0.0, 1.0]", config.getKey(), score);
+      return JudgeResult.builder()
+          .sampled(true)
+          .success(false)
+          .judgeConfigKey(config.getKey())
+          .metricKey(config.getEvaluationMetricKey())
+          .build();
+    }
+
+    JudgeResult.Builder resultBuilder = JudgeResult.builder()
+        .sampled(true)
+        .success(true)
+        .judgeConfigKey(config.getKey())
+        .metricKey(config.getEvaluationMetricKey())
+        .score(score);
+
+    Object reasoningRaw = parsed.get("reasoning");
+    if (reasoningRaw instanceof String) {
+      resultBuilder.reasoning((String) reasoningRaw);
+    } else if (reasoningRaw != null) {
+      logger.warn("Judge {}: reasoning is not a string, ignoring", config.getKey());
+    }
+
+    return resultBuilder.build();
+  }
+
+  /**
+   * Evaluates a message list and runner response, always running (sampling rate {@code 1.0}).
+   * <p>
+   * Messages are formatted as {@code role: content} lines, joined by newlines.
+   *
+   * @param messages the messages that were sent to the model
+   * @param response the runner result whose {@link RunnerResult#getContent() content} is evaluated
+   * @return the evaluation result; never {@code null}
+   */
+  public JudgeResult evaluateMessages(List<Message> messages, RunnerResult response) {
+    return evaluateMessages(messages, response, 1.0);
+  }
+
+  /**
+   * Evaluates a message list and runner response, subject to the given sampling rate.
+   * <p>
+   * Messages are formatted as {@code role: content} lines, joined by newlines.
+   *
+   * @param messages the messages that were sent to the model
+   * @param response the runner result whose {@link RunnerResult#getContent() content} is evaluated
+   * @param samplingRate the fraction of evaluations to actually run
+   * @return the evaluation result; never {@code null}
+   */
+  public JudgeResult evaluateMessages(List<Message> messages, RunnerResult response, double samplingRate) {
+    String formattedMessages = messages == null ? "" : messages.stream()
+        .map(m -> m.getRole().getWireValue() + ": " + m.getContent())
+        .collect(Collectors.joining("\n"));
+    return evaluate(formattedMessages, response == null ? "" : response.getContent(), samplingRate);
+  }
+
+  /**
+   * Returns the judge AI Config this instance was constructed with.
+   *
+   * @return the judge config, never {@code null}
+   */
+  public AIJudgeConfig getConfig() {
+    return config;
+  }
+
+  /**
+   * Returns the runner this instance was constructed with.
+   *
+   * @return the runner, never {@code null}
+   */
+  public Runner getRunner() {
+    return runner;
+  }
+}
Original file line number	Diff line number	Diff line change
`@@ -29,7 +29,7 @@ public final class AIJudgeConfig extends AIConfig {`
`29`	`29`	`List<Message> messages,`
`30`	`30`	`String evaluationMetricKey,`
`31`	`31`	`Supplier<LDAIConfigTracker> trackerFactory) {`
`32`		`- super(key, enabled, Mode.JUDGE, model, provider, trackerFactory);`
	`32`	`+ super(key, enabled, Mode.JUDGE, model, provider, trackerFactory, Evaluator.noop());`
`33`	`33`	`this.messages = messages == null ? null : Collections.unmodifiableList(messages);`
`34`	`34`	`this.evaluationMetricKey = evaluationMetricKey;`
`35`	`35`	`}`