@@ -172,6 +172,20 @@ def _get_numeric_target_value(self) -> float | int | str | None:
172172 async def _evaluate_local_stage (self , data : Any ) -> EvaluatorResult :
173173 """Evaluate using a local stage (runtime rulesets).
174174
175+ We use PASSTHROUGH action so Protect computes the metric and returns
176+ metric_results without making a block decision itself — agent-control
177+ owns that decision via the control's action.decision field.
178+
179+ Numeric operators (gt, lt, gte, lte, eq): Protect evaluates the rule
180+ server-side and returns status="triggered" when the condition is met,
181+ so _parse_response picks it up directly.
182+
183+ Categorical operators (not_empty, any): the Protect local-stage rule
184+ engine does not support these operators and always returns
185+ status="not_triggered", even when the metric value is non-empty.
186+ _parse_response falls back to _evaluate_metric_results which evaluates
187+ the condition client-side from the raw metric_results dict.
188+
175189 Args:
176190 data: The data to evaluate.
177191
@@ -187,7 +201,6 @@ async def _evaluate_local_stage(self, data: Any) -> EvaluatorResult:
187201 target_value = self ._get_numeric_target_value () or 0 ,
188202 )
189203
190- # Create proper Ruleset with PassthroughAction
191204 ruleset = Ruleset (
192205 rules = [rule ],
193206 action = PassthroughAction (type = "PASSTHROUGH" ),
@@ -204,6 +217,7 @@ async def _evaluate_local_stage(self, data: Any) -> EvaluatorResult:
204217 payload = payload ,
205218 prioritized_rulesets = [ruleset ],
206219 project_name = self .config .galileo_project ,
220+ stage_name = self .config .stage_name ,
207221 timeout = self .get_timeout_seconds (),
208222 metadata = self .config .metadata or {},
209223 )
@@ -279,10 +293,20 @@ def _prepare_payload(self, data: Any) -> Payload:
279293 is_output_metric = "output" in metric
280294
281295 if is_output_metric :
282- return Payload (input = "" , output = data_str )
296+ payload = Payload (input = "" , output = data_str )
283297 else :
284298 # Default to input for central stages or input metrics
285- return Payload (input = data_str , output = "" )
299+ payload = Payload (input = data_str , output = "" )
300+
301+ logger .debug (
302+ "[Luna2] _prepare_payload: metric=%r payload_field_config=%r "
303+ "→ input=%d chars, output=%d chars" ,
304+ self .config .metric ,
305+ self .config .payload_field ,
306+ len (payload .input ),
307+ len (payload .output ),
308+ )
309+ return payload
286310
287311 def _parse_response (self , response : ProtectResponse | None ) -> EvaluatorResult :
288312 """Parse Galileo Protect response into EvaluatorResult.
@@ -304,16 +328,34 @@ def _parse_response(self, response: ProtectResponse | None) -> EvaluatorResult:
304328 status = response .status .lower () if response .status else "unknown"
305329 triggered = status == "triggered"
306330
331+ # Numeric operators (gt/lt/etc.) are evaluated server-side by Protect and
332+ # return status="triggered" correctly even with PASSTHROUGH.
333+ # Categorical operators (not_empty, any) are NOT supported by Protect's
334+ # local-stage rule engine — it always returns status="not_triggered" for
335+ # them regardless of the metric value. Fall back to client-side evaluation
336+ # from metric_results for those cases.
337+ if not triggered and response .metric_results :
338+ triggered = self ._evaluate_metric_results (response .metric_results )
339+
340+ logger .info (
341+ "[Luna2] response: status=%r triggered=%s metric_results=%s" ,
342+ status ,
343+ triggered ,
344+ response .metric_results ,
345+ )
346+
307347 # Extract trace metadata
308348 trace_id = response .trace_metadata .id if response .trace_metadata else None
309349 execution_time = response .trace_metadata .execution_time if response .trace_metadata else None
310350 received_at = response .trace_metadata .received_at if response .trace_metadata else None
311351 response_at = response .trace_metadata .response_at if response .trace_metadata else None
312352
353+ message = self ._build_message (triggered , status , response .metric_results )
354+
313355 return EvaluatorResult (
314356 matched = triggered ,
315357 confidence = 1.0 if triggered else 0.0 ,
316- message = response . text or f"Luna-2 check: { status } " ,
358+ message = message ,
317359 metadata = {
318360 "status" : status ,
319361 "metric" : self .config .metric or "unknown" ,
@@ -324,6 +366,74 @@ def _parse_response(self, response: ProtectResponse | None) -> EvaluatorResult:
324366 },
325367 )
326368
369+ def _build_message (self , triggered : bool , status : str , metric_results : dict ) -> str :
370+ """Build a human-readable message from the evaluation result."""
371+ metric = self .config .metric or "unknown"
372+
373+ if not triggered :
374+ return f"Luna-2 { metric } check passed"
375+
376+ result = (metric_results or {}).get (metric , {})
377+ value = result .get ("value" )
378+
379+ if isinstance (value , list ) and value :
380+ categories = ", " .join (str (v ).replace ("_" , " " ) for v in value )
381+ return f"{ metric } detected: { categories } "
382+ if isinstance (value , (int , float )):
383+ return f"{ metric } score { value :.2f} exceeded threshold"
384+
385+ return f"Luna-2 { metric } check triggered"
386+
387+ def _evaluate_metric_results (self , metric_results : dict ) -> bool :
388+ """Evaluate the configured operator/target against raw metric_results.
389+
390+ Used when the Protect API returns PASSTHROUGH (no server-side trigger)
391+ but we still need to decide whether the rule condition is met.
392+
393+ Args:
394+ metric_results: The metric_results dict from the Protect API response.
395+
396+ Returns:
397+ True if the rule condition is satisfied.
398+ """
399+ metric = self .config .metric
400+ if not metric or metric not in metric_results :
401+ return False
402+
403+ result = metric_results [metric ]
404+ if result .get ("status" ) != "SUCCESS" :
405+ return False
406+
407+ value = result .get ("value" )
408+ operator = self .config .operator
409+ target = self .config .target_value
410+
411+ if operator in ("not_empty" , "not_null" ):
412+ return bool (value )
413+ if operator in ("empty" , "is_null" ):
414+ return not bool (value )
415+ if operator == "any" and isinstance (value , list ):
416+ return target in value if target is not None else bool (value )
417+ if operator == "contains" :
418+ return target in value if (value and target is not None ) else False
419+ if isinstance (value , (int , float )) and target is not None :
420+ try :
421+ t = float (target )
422+ if operator == "gt" :
423+ return value > t
424+ if operator == "gte" :
425+ return value >= t
426+ if operator == "lt" :
427+ return value < t
428+ if operator == "lte" :
429+ return value <= t
430+ if operator == "eq" :
431+ return value == t
432+ except (TypeError , ValueError ):
433+ pass
434+
435+ return False
436+
327437 def _handle_error (self , error : Exception ) -> EvaluatorResult :
328438 """Handle errors from Luna-2 evaluation.
329439
0 commit comments