Skip to content

Commit 523524d

Browse files
update the schemas for scorer
1 parent 7b0a15d commit 523524d

4 files changed

Lines changed: 62 additions & 48 deletions

File tree

evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from agent_control_evaluator_galileo.luna.client import (
44
GalileoLunaClient,
5+
ScorerInvokeInputs,
56
ScorerInvokeRequest,
67
ScorerInvokeResponse,
78
)
@@ -10,6 +11,7 @@
1011

1112
__all__ = [
1213
"GalileoLunaClient",
14+
"ScorerInvokeInputs",
1315
"ScorerInvokeRequest",
1416
"ScorerInvokeResponse",
1517
"LunaEvaluatorConfig",

evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from hmac import new as hmac_new
1010
from json import dumps
1111
from time import time
12-
from typing import Literal
1312
from uuid import UUID
1413

1514
import httpx
@@ -66,32 +65,38 @@ def _as_float_or_none(value: JSONValue) -> float | None:
6665
return None
6766

6867

69-
RootType = Literal["session", "trace", "span"]
68+
def _has_value(value: JSONValue) -> bool:
69+
return value is not None and value != ""
70+
71+
72+
class ScorerInvokeInputs(BaseModel):
73+
"""Input values sent to Galileo's scorer invoke API."""
74+
75+
query: JSONValue = ""
76+
response: JSONValue = ""
77+
ground_truth: JSONValue = None
78+
tools: JSONValue = None
7079

7180

7281
class ScorerInvokeRequest(BaseModel):
7382
"""Request payload for Galileo Luna scorer invocation.
7483
7584
Attributes:
76-
root_type: Runtime step shape used by Galileo scorer input normalization.
77-
input: Optional user/system prompt text.
78-
output: Optional model response text.
85+
inputs: Selected scorer input values.
7986
scorer_label: Preset, registered, or fine-tuned scorer label.
8087
project_id: Optional Galileo project UUID for project-scoped scorer resolution.
8188
config: Optional scorer-specific configuration.
8289
"""
8390

84-
root_type: RootType = Field(default="span")
85-
input: JSONValue = None
86-
output: JSONValue = None
8791
scorer_label: str = Field(min_length=1)
92+
inputs: ScorerInvokeInputs
8893
project_id: str | UUID | None = None
8994
config: JSONObject | None = None
9095

9196
@model_validator(mode="after")
9297
def ensure_input_or_output(self) -> ScorerInvokeRequest:
93-
if self.input is None and self.output is None:
94-
raise ValueError("Either input or output must be set.")
98+
if not (_has_value(self.inputs.query) or _has_value(self.inputs.response)):
99+
raise ValueError("Either inputs.query or inputs.response must be set.")
95100
return self
96101

97102
def to_dict(self) -> JSONObject:
@@ -234,7 +239,6 @@ async def invoke(
234239
scorer_label: str,
235240
input: JSONValue = None,
236241
output: JSONValue = None,
237-
root_type: RootType = "span",
238242
project_id: str | UUID | None = None,
239243
config: JSONObject | None = None,
240244
timeout: float = DEFAULT_TIMEOUT_SECS,
@@ -246,7 +250,6 @@ async def invoke(
246250
scorer_label: Preset, registered, or fine-tuned scorer label.
247251
input: Optional user/system prompt text.
248252
output: Optional model response text.
249-
root_type: Runtime step shape used by Galileo scorer input normalization.
250253
project_id: Optional Galileo project UUID for project-scoped scorer resolution.
251254
config: Optional scorer-specific configuration.
252255
timeout: Request timeout in seconds.
@@ -266,9 +269,9 @@ async def invoke(
266269

267270
request_body = ScorerInvokeRequest(
268271
scorer_label=scorer_label,
269-
input=input,
270-
output=output,
271-
root_type=root_type,
272+
inputs=ScorerInvokeInputs(
273+
query="" if input is None else input, response="" if output is None else output
274+
),
272275
project_id=project_id,
273276
config=config,
274277
).to_dict()

evaluators/contrib/galileo/tests/test_luna_evaluator.py

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -52,34 +52,38 @@ def test_numeric_operator_requires_numeric_threshold(self) -> None:
5252
class TestGalileoLunaClient:
5353
"""Tests for the GalileoLunaClient HTTP contract."""
5454

55-
def test_scorer_invoke_request_matches_orbit_schema_shape(self) -> None:
56-
from agent_control_evaluator_galileo.luna import ScorerInvokeRequest
55+
def test_scorer_invoke_request_matches_api_schema_shape(self) -> None:
56+
from agent_control_evaluator_galileo.luna import ScorerInvokeInputs, ScorerInvokeRequest
5757

5858
# Given: a scorer request with project context and scorer config
5959
request = ScorerInvokeRequest(
6060
scorer_label="toxicity",
61-
input={"messages": [{"role": "user", "content": "hello"}]},
61+
inputs=ScorerInvokeInputs(query={"messages": [{"role": "user", "content": "hello"}]}),
6262
project_id="12345678-1234-5678-1234-567812345678",
6363
config={"top_k": 1},
6464
)
6565

66-
# Then: the serialized payload uses the Orbit scorer invoke fields
66+
# Then: the serialized payload uses the API-owned scorer invoke fields
6767
assert request.to_dict() == {
68-
"root_type": "span",
69-
"input": {"messages": [{"role": "user", "content": "hello"}]},
7068
"scorer_label": "toxicity",
69+
"inputs": {
70+
"query": {"messages": [{"role": "user", "content": "hello"}]},
71+
"response": "",
72+
},
7173
"project_id": "12345678-1234-5678-1234-567812345678",
7274
"config": {"top_k": 1},
7375
}
7476

7577
def test_scorer_invoke_request_requires_input_or_output(self) -> None:
7678
from agent_control_evaluator_galileo.luna import ScorerInvokeRequest
7779

78-
# Given/When/Then: the request mirrors Orbit validation
79-
with pytest.raises(ValidationError, match="Either input or output must be set"):
80-
ScorerInvokeRequest(scorer_label="toxicity")
80+
# Given/When/Then: the request mirrors API validation
81+
with pytest.raises(
82+
ValidationError, match="Either inputs.query or inputs.response must be set"
83+
):
84+
ScorerInvokeRequest(scorer_label="toxicity", inputs={})
8185

82-
def test_scorer_invoke_response_matches_orbit_schema_shape(self) -> None:
86+
def test_scorer_invoke_response_matches_api_schema_shape(self) -> None:
8387
from agent_control_evaluator_galileo.luna import ScorerInvokeResponse
8488

8589
# Given: an API scorer invoke response
@@ -93,7 +97,7 @@ def test_scorer_invoke_response_matches_orbit_schema_shape(self) -> None:
9397
}
9498
)
9599

96-
# Then: the model exposes the Orbit/API response fields
100+
# Then: the model exposes the API response fields
97101
assert response.model_dump() == {
98102
"scorer_label": "toxicity",
99103
"score": 0.82,
@@ -187,11 +191,9 @@ def handler(request: httpx.Request) -> httpx.Response:
187191
assert response.score == 0.82
188192
assert captured["url"] == "https://api.demo-v2.galileocloud.io/scorers/invoke"
189193
assert captured["body"] == {
190-
"input": "user prompt",
191-
"output": "model answer",
192194
"scorer_label": "toxicity",
195+
"inputs": {"query": "user prompt", "response": "model answer"},
193196
"project_id": "12345678-1234-5678-1234-567812345678",
194-
"root_type": "span",
195197
"config": {"top_k": 1},
196198
}
197199
assert "stage_name" not in captured["body"]
@@ -237,12 +239,13 @@ def handler(request: httpx.Request) -> httpx.Response:
237239

238240
# Then: the internal scorer endpoint is called with a project-bound JWT
239241
assert response.score == 0.82
240-
assert captured["url"] == "https://api.default.svc.cluster.local:8088/internal/scorers/invoke"
242+
assert (
243+
captured["url"] == "https://api.default.svc.cluster.local:8088/internal/scorers/invoke"
244+
)
241245
assert captured["body"] == {
242-
"output": "model answer",
243246
"scorer_label": "toxicity",
247+
"inputs": {"query": "", "response": "model answer"},
244248
"project_id": "12345678-1234-5678-1234-567812345678",
245-
"root_type": "span",
246249
}
247250
headers = captured["headers"]
248251
assert isinstance(headers, dict)

sdks/python/src/agent_control/evaluators/__init__.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -44,19 +44,23 @@
4444
LunaEvaluator,
4545
LunaEvaluatorConfig,
4646
LunaOperator,
47+
ScorerInvokeInputs,
4748
ScorerInvokeRequest,
4849
ScorerInvokeResponse,
4950
)
5051

51-
__all__.extend([
52-
"GalileoLunaClient",
53-
"ScorerInvokeRequest",
54-
"ScorerInvokeResponse",
55-
"LunaEvaluator",
56-
"LunaEvaluatorConfig",
57-
"LunaOperator",
58-
"LUNA_AVAILABLE",
59-
])
52+
__all__.extend(
53+
[
54+
"GalileoLunaClient",
55+
"ScorerInvokeInputs",
56+
"ScorerInvokeRequest",
57+
"ScorerInvokeResponse",
58+
"LunaEvaluator",
59+
"LunaEvaluatorConfig",
60+
"LunaOperator",
61+
"LUNA_AVAILABLE",
62+
]
63+
)
6064
except ImportError:
6165
pass
6266

@@ -69,12 +73,14 @@
6973
Luna2Operator,
7074
)
7175

72-
__all__.extend([
73-
"Luna2Evaluator",
74-
"Luna2EvaluatorConfig",
75-
"Luna2Metric",
76-
"Luna2Operator",
77-
"LUNA2_AVAILABLE",
78-
])
76+
__all__.extend(
77+
[
78+
"Luna2Evaluator",
79+
"Luna2EvaluatorConfig",
80+
"Luna2Metric",
81+
"Luna2Operator",
82+
"LUNA2_AVAILABLE",
83+
]
84+
)
7985
except ImportError:
8086
pass

0 commit comments

Comments
 (0)