Skip to content

Commit 39ccda7

Browse files
authored
chore: fix sampled field semantics on JudgeResult (#137)
1 parent 20fff24 commit 39ccda7

4 files changed

Lines changed: 12 additions & 7 deletions

File tree

packages/sdk/server-ai/src/ldai/judge/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ async def evaluate(
5151
:param input_text: The input prompt or question that was provided to the AI
5252
:param output_text: The AI-generated response to be evaluated
5353
:param sampling_rate: Sampling rate (0-1) to determine if evaluation should be processed (defaults to 1)
54-
:return: Evaluation result; ``sampled=True`` when skipped due to sampling rate
54+
:return: The result of the judge evaluation.
5555
"""
5656
judge_result = JudgeResult(judge_config_key=self._ai_config.key)
5757

@@ -70,9 +70,9 @@ async def evaluate(
7070

7171
if random.random() > sampling_rate:
7272
log.debug(f'Judge evaluation skipped due to sampling rate: {sampling_rate}')
73-
judge_result.sampled = True
7473
return judge_result
7574

75+
judge_result.sampled = True
7676
messages = self._construct_evaluation_messages(input_text, output_text)
7777
assert self._evaluation_response_structure is not None
7878

@@ -110,7 +110,7 @@ async def evaluate_messages(
110110
:param messages: Array of messages representing the conversation history
111111
:param response: The AI response to be evaluated
112112
:param sampling_ratio: Sampling ratio (0-1) to determine if evaluation should be processed (defaults to 1)
113-
:return: Evaluation result; ``sampled=True`` when skipped due to sampling rate
113+
:return: The result of the judge evaluation.
114114
"""
115115
input_text = '\r\n'.join([msg.content for msg in messages]) if messages else ''
116116
output_text = response.message.content

packages/sdk/server-ai/src/ldai/providers/types.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,10 @@ class JudgeResult:
6565
judge_config_key: Optional[str] = None
6666
success: bool = False
6767
error_message: Optional[str] = None
68-
sampled: bool = False # True when the judge was skipped due to sampling rate
68+
sampled: bool = False # True when the evaluation was sampled and run
69+
metric_key: Optional[str] = None
6970
score: Optional[float] = None
7071
reasoning: Optional[str] = None
71-
metric_key: Optional[str] = None
7272

7373
def to_dict(self) -> Dict[str, Any]:
7474
"""

packages/sdk/server-ai/src/ldai/tracker.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,9 @@ def track_judge_result(self, judge_result: Any) -> None:
241241
242242
:param judge_result: JudgeResult object containing score, metric key, and success status
243243
"""
244+
if not judge_result.sampled:
245+
return
246+
244247
if judge_result.success and judge_result.metric_key:
245248
track_data = self.__get_track_data()
246249
if judge_result.judge_config_key:

packages/sdk/server-ai/tests/test_judge.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ async def test_evaluate_success_with_valid_response(
168168

169169
assert isinstance(result, JudgeResult)
170170
assert result.success is True
171+
assert result.sampled is True
171172
assert result.metric_key == '$ld:ai:judge:relevance'
172173
assert result.score == 0.85
173174
assert result.reasoning is not None
@@ -194,6 +195,7 @@ async def test_evaluate_success_with_evaluation_response_shape(
194195

195196
assert isinstance(result, JudgeResult)
196197
assert result.success is True
198+
assert result.sampled is True
197199
assert result.metric_key == '$ld:ai:judge:relevance'
198200
assert result.score == 0.9
199201
assert result.reasoning is not None
@@ -288,13 +290,13 @@ async def test_evaluate_handles_exception(
288290
async def test_evaluate_respects_sampling_rate(
289291
self, judge_config_with_key: AIJudgeConfig, tracker: LDAIConfigTracker, mock_runner
290292
):
291-
"""Evaluate should return sampled=True when skipped due to sampling rate."""
293+
"""Evaluate should return sampled=False when skipped due to sampling rate."""
292294
judge = Judge(judge_config_with_key, tracker, mock_runner)
293295

294296
result = await judge.evaluate("input", "output", sampling_rate=0.0)
295297

296298
assert isinstance(result, JudgeResult)
297-
assert result.sampled is True
299+
assert result.sampled is False
298300
assert result.success is False
299301
mock_runner.invoke_structured_model.assert_not_called()
300302

0 commit comments

Comments
 (0)