Skip to content

Commit ab40f62

Browse files
feat: introduce guardrail step type
1 parent 0bb11aa commit ab40f62

3 files changed

Lines changed: 225 additions & 67 deletions

File tree

src/openlayer/lib/tracing/enums.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@
66
class StepType(enum.Enum):
77
USER_CALL = "user_call"
88
CHAT_COMPLETION = "chat_completion"
9+
GUARDRAIL = "guardrail"

src/openlayer/lib/tracing/steps.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import time
44
import uuid
5-
from typing import Any, Dict, Optional
5+
from typing import Any, Dict, Optional, List
66

77
from .. import utils
88
from . import enums
@@ -119,6 +119,39 @@ def to_dict(self) -> Dict[str, Any]:
119119
return step_dict
120120

121121

122+
class GuardrailStep(Step):
123+
"""Step for tracking guardrail execution."""
124+
125+
def __init__(self, **kwargs):
126+
super().__init__(**kwargs)
127+
self.step_type = enums.StepType.GUARDRAIL
128+
self.action: Optional[str] = None
129+
self.blocked_entities: Optional[List[str]] = None
130+
self.confidence_threshold: float = None
131+
self.reason: Optional[str] = None
132+
self.detected_entities: Optional[List[str]] = None
133+
self.redacted_entities: Optional[List[str]] = None
134+
self.block_strategy: Optional[str] = None
135+
self.data_type: Optional[str] = None
136+
137+
def to_dict(self) -> Dict[str, Any]:
138+
"""Dictionary representation of the GuardrailStep."""
139+
step_dict = super().to_dict()
140+
step_dict.update(
141+
{
142+
"action": self.action,
143+
"blockedEntities": self.blocked_entities,
144+
"confidenceThreshold": self.confidence_threshold,
145+
"reason": self.reason,
146+
"detectedEntities": self.detected_entities,
147+
"blockStrategy": self.block_strategy,
148+
"redactedEntities": self.redacted_entities,
149+
"dataType": self.data_type,
150+
}
151+
)
152+
return step_dict
153+
154+
122155
# ----------------------------- Factory function ----------------------------- #
123156
def step_factory(step_type: enums.StepType, *args, **kwargs) -> Step:
124157
"""Factory function to create a step based on the step_type."""
@@ -127,5 +160,6 @@ def step_factory(step_type: enums.StepType, *args, **kwargs) -> Step:
127160
step_type_mapping = {
128161
enums.StepType.USER_CALL: UserCallStep,
129162
enums.StepType.CHAT_COMPLETION: ChatCompletionStep,
163+
enums.StepType.GUARDRAIL: GuardrailStep,
130164
}
131165
return step_type_mapping[step_type](*args, **kwargs)

src/openlayer/lib/tracing/tracer.py

Lines changed: 189 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from ...types.inference_pipelines.data_stream_params import ConfigLlmData
1616
from .. import utils
1717
from . import enums, steps, traces
18+
from ..guardrails.base import GuardrailResult, GuardrailAction
1819

1920
logger = logging.getLogger(__name__)
2021

@@ -1098,7 +1099,7 @@ def _apply_input_guardrails(
10981099
guardrails: List[Any],
10991100
inputs: Dict[str, Any],
11001101
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
1101-
"""Apply guardrails to function inputs.
1102+
"""Apply guardrails to function inputs, creating guardrail steps.
11021103
11031104
Args:
11041105
guardrails: List of guardrail instances
@@ -1111,9 +1112,9 @@ def _apply_input_guardrails(
11111112
return inputs, {}
11121113

11131114
modified_inputs = inputs.copy()
1114-
guardrail_metadata = {}
1115+
overall_metadata = {}
11151116

1116-
for guardrail in guardrails:
1117+
for i, guardrail in enumerate(guardrails):
11171118
try:
11181119
# Import here to avoid circular imports
11191120
from ..guardrails.base import BaseGuardrail, GuardrailBlockedException
@@ -1125,50 +1126,112 @@ def _apply_input_guardrails(
11251126
if not guardrail.is_enabled():
11261127
continue
11271128

1128-
result = guardrail.check_input(modified_inputs)
1129+
# Create a guardrail step for this check
1130+
with create_step(
1131+
name=f"{guardrail.name} - Input",
1132+
step_type=enums.StepType.GUARDRAIL,
1133+
) as guardrail_step:
1134+
try:
1135+
# Apply the guardrail
1136+
result = guardrail.check_input(modified_inputs)
1137+
1138+
# Store guardrail metadata for main function step
1139+
guardrail_key = f"input_{guardrail.name.lower().replace(' ', '_')}"
1140+
overall_metadata[guardrail_key] = {
1141+
"action": result.action.value,
1142+
"reason": result.reason,
1143+
"metadata": result.metadata or {},
1144+
}
1145+
1146+
# Prepare step logging data
1147+
step_log_data = {
1148+
"action": result.action.value,
1149+
"reason": result.reason,
1150+
"data_type": "input",
1151+
"inputs": {"original_data": modified_inputs},
1152+
}
1153+
1154+
if result.action.value == "block":
1155+
# Handle the block according to strategy
1156+
final_inputs, block_metadata = _handle_guardrail_block(
1157+
guardrail=guardrail,
1158+
result=result,
1159+
modified_inputs=modified_inputs,
1160+
guardrail_metadata=overall_metadata,
1161+
guardrail_key=guardrail_key,
1162+
is_input=True,
1163+
)
11291164

1130-
# Store guardrail metadata
1131-
guardrail_key = f"input_{guardrail.name.lower().replace(' ', '_')}"
1132-
guardrail_metadata[guardrail_key] = {
1133-
"action": result.action.value,
1134-
"reason": result.reason,
1135-
"metadata": result.metadata or {},
1136-
}
1165+
# Add final output if different
1166+
if final_inputs != modified_inputs:
1167+
step_log_data["output"] = final_inputs
1168+
1169+
# Log once with all data
1170+
guardrail_step.log(**step_log_data)
1171+
return final_inputs, overall_metadata
1172+
1173+
elif (
1174+
result.action.value == "modify"
1175+
and result.modified_data is not None
1176+
):
1177+
step_log_data["output"] = result.modified_data
1178+
modified_inputs = result.modified_data
1179+
logger.debug("Guardrail %s modified inputs", guardrail.name)
1180+
1181+
else: # allow
1182+
step_log_data["output"] = modified_inputs
1183+
1184+
# Single log call with all data
1185+
guardrail_step.log(**step_log_data)
1186+
1187+
except Exception as e:
1188+
# Create error result for the guardrail step
1189+
error_result = GuardrailResult(
1190+
action=GuardrailAction.ALLOW, # Default to allow on error
1191+
reason=f"Guardrail error: {str(e)}",
1192+
metadata={"error": str(e), "error_type": type(e).__name__},
1193+
)
1194+
guardrail_step.log(
1195+
inputs={"original_data": modified_inputs},
1196+
output=modified_inputs,
1197+
)
11371198

1138-
if result.action.value == "block":
1139-
return _handle_guardrail_block(
1140-
guardrail=guardrail,
1141-
result=result,
1142-
modified_inputs=modified_inputs,
1143-
guardrail_metadata=guardrail_metadata,
1144-
guardrail_key=guardrail_key,
1145-
is_input=True,
1146-
)
1147-
elif result.action.value == "modify" and result.modified_data is not None:
1148-
modified_inputs = result.modified_data
1149-
logger.debug("Guardrail %s modified inputs", guardrail.name)
1199+
if hasattr(e, "guardrail_name"):
1200+
# Re-raise guardrail exceptions
1201+
raise
1202+
else:
1203+
# Log other exceptions but don't fail the trace
1204+
logger.error(
1205+
"Error applying input guardrail %s: %s", guardrail.name, e
1206+
)
1207+
guardrail_key = (
1208+
f"input_{guardrail.name.lower().replace(' ', '_')}"
1209+
)
1210+
overall_metadata[guardrail_key] = {
1211+
"action": "error",
1212+
"reason": str(e),
1213+
"metadata": {"error_type": type(e).__name__},
1214+
"guardrail_name": guardrail.name,
1215+
}
11501216

11511217
except Exception as e:
1218+
# Handle exceptions that occur outside the guardrail step context
11521219
if hasattr(e, "guardrail_name"):
1153-
# Re-raise guardrail exceptions
11541220
raise
11551221
else:
1156-
# Log other exceptions but don't fail the trace
1157-
logger.error("Error applying input guardrail %s: %s", guardrail.name, e)
1158-
guardrail_key = f"input_{guardrail.name.lower().replace(' ', '_')}"
1159-
guardrail_metadata[guardrail_key] = {
1160-
"action": "error",
1161-
"reason": str(e),
1162-
"metadata": {},
1163-
}
1222+
logger.error(
1223+
"Error setting up input guardrail %s: %s",
1224+
getattr(guardrail, "name", f"guardrail_{i}"),
1225+
e,
1226+
)
11641227

1165-
return modified_inputs, guardrail_metadata
1228+
return modified_inputs, overall_metadata
11661229

11671230

11681231
def _apply_output_guardrails(
11691232
guardrails: List[Any], output: Any, inputs: Dict[str, Any]
11701233
) -> Tuple[Any, Dict[str, Any]]:
1171-
"""Apply guardrails to function output.
1234+
"""Apply guardrails to function output, creating guardrail steps.
11721235
11731236
Args:
11741237
guardrails: List of guardrail instances
@@ -1182,9 +1245,9 @@ def _apply_output_guardrails(
11821245
return output, {}
11831246

11841247
modified_output = output
1185-
guardrail_metadata = {}
1248+
overall_metadata = {}
11861249

1187-
for guardrail in guardrails:
1250+
for i, guardrail in enumerate(guardrails):
11881251
try:
11891252
# Import here to avoid circular imports
11901253
from ..guardrails.base import BaseGuardrail, GuardrailBlockedException
@@ -1196,46 +1259,106 @@ def _apply_output_guardrails(
11961259
if not guardrail.is_enabled():
11971260
continue
11981261

1199-
result = guardrail.check_output(modified_output, inputs)
1262+
# Create a guardrail step for this check
1263+
with create_step(
1264+
name=f"{guardrail.name} - Output",
1265+
step_type=enums.StepType.GUARDRAIL,
1266+
) as guardrail_step:
1267+
try:
1268+
# Apply the guardrail
1269+
result = guardrail.check_output(modified_output, inputs)
1270+
1271+
# Store guardrail metadata for main function step
1272+
guardrail_key = f"output_{guardrail.name.lower().replace(' ', '_')}"
1273+
overall_metadata[guardrail_key] = {
1274+
"action": result.action.value,
1275+
"reason": result.reason,
1276+
"metadata": result.metadata or {},
1277+
}
1278+
1279+
# Prepare step logging data
1280+
step_log_data = {
1281+
"action": result.action.value,
1282+
"reason": result.reason,
1283+
"data_type": "output",
1284+
"inputs": {"original_data": modified_output},
1285+
}
1286+
1287+
if result.action.value == "block":
1288+
# Handle the block according to strategy
1289+
final_output, block_metadata = _handle_guardrail_block(
1290+
guardrail=guardrail,
1291+
result=result,
1292+
modified_output=modified_output,
1293+
guardrail_metadata=overall_metadata,
1294+
guardrail_key=guardrail_key,
1295+
is_input=False,
1296+
)
12001297

1201-
# Store guardrail metadata
1202-
guardrail_key = f"output_{guardrail.name.lower().replace(' ', '_')}"
1203-
guardrail_metadata[guardrail_key] = {
1204-
"action": result.action.value,
1205-
"reason": result.reason,
1206-
"metadata": result.metadata or {},
1207-
}
1298+
# Add final output if different
1299+
if final_output != modified_output:
1300+
step_log_data["output"] = final_output
1301+
1302+
# Log once with all data
1303+
guardrail_step.log(**step_log_data)
1304+
return final_output, overall_metadata
1305+
1306+
elif (
1307+
result.action.value == "modify"
1308+
and result.modified_data is not None
1309+
):
1310+
step_log_data["output"] = result.modified_data
1311+
modified_output = result.modified_data
1312+
logger.debug("Guardrail %s modified output", guardrail.name)
1313+
1314+
else: # allow
1315+
step_log_data["output"] = modified_output
1316+
1317+
# Single log call with all data
1318+
guardrail_step.log(**step_log_data)
1319+
1320+
except Exception as e:
1321+
# Create error result for the guardrail step
1322+
error_result = GuardrailResult(
1323+
action=GuardrailAction.ALLOW, # Default to allow on error
1324+
reason=f"Guardrail error: {str(e)}",
1325+
metadata={"error": str(e), "error_type": type(e).__name__},
1326+
)
1327+
guardrail_step.log(
1328+
inputs={"original_data": modified_output},
1329+
output=modified_output,
1330+
)
12081331

1209-
if result.action.value == "block":
1210-
return _handle_guardrail_block(
1211-
guardrail=guardrail,
1212-
result=result,
1213-
modified_output=modified_output,
1214-
guardrail_metadata=guardrail_metadata,
1215-
guardrail_key=guardrail_key,
1216-
is_input=False,
1217-
)
1218-
elif result.action.value == "modify" and result.modified_data is not None:
1219-
modified_output = result.modified_data
1220-
logger.debug("Guardrail %s modified output", guardrail.name)
1332+
if hasattr(e, "guardrail_name"):
1333+
# Re-raise guardrail exceptions
1334+
raise
1335+
else:
1336+
# Log other exceptions but don't fail the trace
1337+
logger.error(
1338+
"Error applying output guardrail %s: %s", guardrail.name, e
1339+
)
1340+
guardrail_key = (
1341+
f"output_{guardrail.name.lower().replace(' ', '_')}"
1342+
)
1343+
overall_metadata[guardrail_key] = {
1344+
"action": "error",
1345+
"reason": str(e),
1346+
"metadata": {"error_type": type(e).__name__},
1347+
}
1348+
guardrail_step.log(**overall_metadata[guardrail_key])
12211349

12221350
except Exception as e:
1351+
# Handle exceptions that occur outside the guardrail step context
12231352
if hasattr(e, "guardrail_name"):
1224-
# Re-raise guardrail exceptions
12251353
raise
12261354
else:
1227-
# Log other exceptions but don't fail the trace
12281355
logger.error(
1229-
"Error applying output guardrail %s: %s", guardrail.name, e
1356+
"Error setting up output guardrail %s: %s",
1357+
getattr(guardrail, "name", f"guardrail_{i}"),
1358+
e,
12301359
)
1231-
guardrail_key = f"output_{guardrail.name.lower().replace(' ', '_')}"
1232-
guardrail_metadata[guardrail_key] = {
1233-
"action": "error",
1234-
"reason": str(e),
1235-
"metadata": {},
1236-
}
12371360

1238-
return modified_output, guardrail_metadata
1361+
return modified_output, overall_metadata
12391362

12401363

12411364
def _handle_guardrail_block(

0 commit comments

Comments
 (0)