Skip to content

Commit 7fd93c0

Browse files
authored
feat(guardrails): Add Guardrails (#946)
1 parent d44ce24 commit 7fd93c0

24 files changed

Lines changed: 4310 additions & 549 deletions

packages/ai-semantic-conventions/src/SemanticAttributes.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,21 @@
1616
// TODO: Remove unnecessary comments after all instrumentations are updated before (parent branch merge)
1717

1818
export const SpanAttributes = {
19+
// Guardrail attributes (custom — not yet in OTel core spec)
20+
21+
// Parent guardrail span (gen_ai.operation.name = "guardrail.run")
22+
GEN_AI_GUARDRAIL_NAME: "gen_ai.guardrail.name",
23+
GEN_AI_GUARDRAIL_STATUS: "gen_ai.guardrail.status",
24+
GEN_AI_GUARDRAIL_DURATION: "gen_ai.guardrail.duration",
25+
GEN_AI_GUARDRAIL_GUARD_COUNT: "gen_ai.guardrail.guard_count",
26+
GEN_AI_GUARDRAIL_FAILED_GUARD_COUNT: "gen_ai.guardrail.failed_guard_count",
27+
28+
// Child guard span (gen_ai.operation.name = "guard")
29+
GEN_AI_GUARDRAIL_INPUT: "gen_ai.guardrail.input",
30+
GEN_AI_GUARDRAIL_OUTPUT: "gen_ai.guardrail.output",
31+
GEN_AI_GUARDRAIL_ERROR_TYPE: "gen_ai.guardrail.error.type",
32+
GEN_AI_GUARDRAIL_ERROR_MESSAGE: "gen_ai.guardrail.error.message",
33+
1934
// Attributes not yet in @opentelemetry/semantic-conventions
2035

2136
// TODO: Remove after all instrumentations are updated
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
/**
2+
* Custom Evaluator Guard Example
3+
* ================================
4+
* Shows how to use a custom LLM-as-a-judge evaluator as a guard.
5+
*
6+
* Scenario: a physics education app that only answers physics questions.
7+
* The LLM response is checked with the custom evaluator guard to confirm
8+
* it actually contains physics content. Off-topic responses are blocked
9+
* and replaced with a fallback message.
10+
*
11+
* The custom evaluator uses a two-call HTTP flow:
12+
* 1. POST /v2/evaluators/{slug}/executions — trigger the LLM judge
13+
* 2. GET /v2{streamUrl} — blocking long-poll for result
14+
*
15+
* Prerequisites:
16+
* - A custom evaluator with slug "custom-test" must exist on your backend.
17+
* The evaluator should return { isValid: boolean, reasoning: string }.
18+
* Its prompt template should use {llm_response} as the input variable.
19+
*
20+
* Run:
21+
* npm run build && node dist/src/guardrails/custom_evaluator.js
22+
*
23+
* Environment:
24+
* OPENAI_API_KEY — OpenAI key
25+
* TRACELOOP_API_KEY — your Traceloop API key
26+
* TRACELOOP_BASE_URL — https://api.traceloop.dev
27+
*/
28+
29+
// ── Init — Traceloop FIRST ───────────────────────────────────────────────────
30+
import * as traceloop from "@traceloop/node-server-sdk";
31+
import OpenAI from "openai";
32+
33+
traceloop.initialize({
34+
appName: "guardrails-custom-evaluator-example",
35+
apiKey: process.env.TRACELOOP_API_KEY,
36+
baseUrl: process.env.TRACELOOP_BASE_URL,
37+
disableBatch: true,
38+
silenceInitializationMessage: true,
39+
instrumentModules: {
40+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
41+
openAI: OpenAI as any,
42+
},
43+
});
44+
45+
import {
46+
Guardrails,
47+
customEvaluatorGuard,
48+
isTrue,
49+
} from "@traceloop/node-server-sdk";
50+
51+
const openai = new OpenAI();
52+
53+
// The slug of the custom evaluator on your Traceloop backend.
54+
// The evaluator should accept {llm_response} and return { isValid: boolean }.
55+
const EVALUATOR_SLUG = "custom-test";
56+
57+
const FALLBACK =
58+
"I can only help with physics questions. Please ask something related to physics.";
59+
60+
// ── Helpers ───────────────────────────────────────────────────────────────────
61+
62+
async function askLLM(question: string): Promise<string> {
63+
const response = await openai.chat.completions.create({
64+
model: "gpt-4o-mini",
65+
max_tokens: 200,
66+
messages: [
67+
{
68+
role: "system",
69+
content:
70+
"You are a physics tutor. Answer questions about physics concisely.",
71+
},
72+
{ role: "user", content: question },
73+
],
74+
});
75+
return response.choices[0]?.message?.content ?? "";
76+
}
77+
78+
// ── Guard setup ───────────────────────────────────────────────────────────────
79+
80+
// Custom evaluator guard — checks whether the LLM response is about physics.
81+
// conditionField "isValid" maps to the boolean field in the evaluator output.
82+
// The custom inputMapper passes the response under the {llm_response} template variable.
83+
const physicsContentGuard = customEvaluatorGuard(EVALUATOR_SLUG, {
84+
conditionField: "isValid",
85+
condition: isTrue(),
86+
timeoutMs: 60000,
87+
});
88+
89+
const responseGuard = new Guardrails([physicsContentGuard], {
90+
onFailure: FALLBACK,
91+
// The custom evaluator template uses {llm_response} — map the LLM output to that field.
92+
inputMapper: (output) => [{ llm_response: output as string }],
93+
});
94+
95+
// ── Main ──────────────────────────────────────────────────────────────────────
96+
97+
async function main(): Promise<void> {
98+
await traceloop.withWorkflow(
99+
{ name: "custom-evaluator-example" },
100+
async () => {
101+
console.log(
102+
"\n── guard() on response — physics question → LLM → evaluator ──",
103+
);
104+
// A real physics question: LLM answers, custom evaluator confirms it's physics.
105+
106+
const physicsQuestion =
107+
"How does Newton's second law relate force and mass?";
108+
console.log(`Question: "${physicsQuestion}"`);
109+
110+
const llmResponse = await askLLM(physicsQuestion);
111+
console.log(`LLM response: "${llmResponse.slice(0, 100)}..."`);
112+
113+
const start = Date.now();
114+
const physicsResult = await responseGuard.run(async () => llmResponse);
115+
const duration = Date.now() - start;
116+
117+
console.log(
118+
`Custom evaluator result: ${physicsResult !== FALLBACK ? "✅ physics confirmed" : "🚫 not physics"} (${duration}ms)`,
119+
);
120+
121+
console.log(
122+
"\n── off-topic question → LLM → evaluator blocks response ──",
123+
);
124+
// An off-topic question: LLM answers about cooking, evaluator blocks it.
125+
126+
const offTopicQuestion = "What is the best way to cook pasta?";
127+
console.log(`Question: "${offTopicQuestion}"`);
128+
129+
const offTopicResponse = await askLLM(offTopicQuestion);
130+
console.log(`LLM response: "${offTopicResponse.slice(0, 100)}..."`);
131+
132+
// run() calls the guard and invokes onFailure (returns FALLBACK) if blocked
133+
const finalResponse = await responseGuard.run(
134+
async () => offTopicResponse,
135+
);
136+
console.log(`Response shown to user: "${finalResponse}"`);
137+
138+
console.log(
139+
"\n── done ───────────────────────────────────────────────────",
140+
);
141+
console.log("Check the Traceloop UI for span details.\n");
142+
},
143+
);
144+
145+
await traceloop.forceFlush();
146+
}
147+
148+
main();
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
/**
2+
* Guard Error Handling Example
3+
* =============================
4+
* Demonstrates that real guard errors (network timeout, HTTP failure, code bug)
5+
* propagate as GuardExecutionError — they are NOT silently treated as logical
6+
* failures. This matches Python SDK behavior.
7+
*
8+
* Each example verifies:
9+
* - GuardExecutionError is thrown to the caller
10+
* - The guard span gets ERROR OTel status + error.type/error.message attributes
11+
*
12+
* Check the Traceloop UI after running — look for spans with red ERROR status.
13+
*
14+
* Run:
15+
* npm run build && node dist/src/guardrails/error_handling.js
16+
*
17+
* Environment:
18+
* TRACELOOP_API_KEY — your Traceloop API key
19+
* TRACELOOP_BASE_URL — https://api.traceloop.dev
20+
*/
21+
22+
// ── Init — Traceloop FIRST ───────────────────────────────────────────────────
23+
import * as traceloop from "@traceloop/node-server-sdk";
24+
25+
traceloop.initialize({
26+
appName: "guardrails-error-handling-example",
27+
apiKey: process.env.TRACELOOP_API_KEY,
28+
baseUrl: process.env.TRACELOOP_BASE_URL,
29+
disableBatch: true,
30+
silenceInitializationMessage: true,
31+
});
32+
33+
import {
34+
Guardrails,
35+
validateContent,
36+
GuardExecutionError,
37+
} from "@traceloop/node-server-sdk";
38+
import type { Guard } from "@traceloop/node-server-sdk";
39+
40+
// ── Helpers ───────────────────────────────────────────────────────────────────
41+
42+
function sep(title: string) {
43+
console.log(`\n${"─".repeat(60)}`);
44+
console.log(` ${title}`);
45+
console.log("─".repeat(60));
46+
}
47+
48+
// A guard that always throws a real error (simulates timeout / network failure)
49+
function makeErrorGuard(message: string): Guard {
50+
const g: Guard = async (_input) => {
51+
throw new Error(message);
52+
};
53+
g.guardName = "error-guard";
54+
return g;
55+
}
56+
57+
// A guard that always passes — used alongside error guards in multi-guard cases
58+
const alwaysPass: Guard = Object.assign(
59+
async (_input: Record<string, unknown>) => true,
60+
{ guardName: "always-pass" },
61+
);
62+
63+
// ── Example 1: validateContent() propagates GuardExecutionError ─────────────────────
64+
65+
async function example1_validateThrows(): Promise<void> {
66+
sep("EXAMPLE 1 — validateContent() throws GuardExecutionError on real error");
67+
68+
console.log(
69+
" Running validateContent() with a guard that throws a network error...",
70+
);
71+
72+
try {
73+
await validateContent("some LLM output", [
74+
makeErrorGuard("Simulated network timeout"),
75+
]);
76+
console.log(" ❌ ERROR: validateContent() should have thrown but didn't");
77+
} catch (err) {
78+
if (err instanceof GuardExecutionError) {
79+
console.log(" ✅ GuardExecutionError thrown as expected");
80+
console.log(` .message: "${err.message}"`);
81+
console.log(
82+
` .originalException: "${err.originalException.message}"`,
83+
);
84+
console.log(` .guardIndex: ${err.guardIndex}`);
85+
console.log(
86+
" ℹ️ Check Traceloop UI: error-guard.guard span → ERROR status,",
87+
);
88+
console.log(" gen_ai.guardrail.error.type = Error,");
89+
console.log(
90+
" gen_ai.guardrail.error.message = Simulated network timeout",
91+
);
92+
} else {
93+
console.log(" ❌ Wrong error type thrown:", err);
94+
}
95+
}
96+
}
97+
98+
// ── Example 2: run() propagates GuardExecutionError ──────────────────────────
99+
100+
async function example2_runThrows(): Promise<void> {
101+
sep("EXAMPLE 2 — run() throws GuardExecutionError on real error");
102+
103+
console.log(
104+
" Running Guardrails.run() with a guard that throws an HTTP error...",
105+
);
106+
107+
const g = new Guardrails([makeErrorGuard("HTTP 503: Service Unavailable")], {
108+
onFailure: "log",
109+
});
110+
111+
try {
112+
await g.run(async () => "LLM response text");
113+
console.log(" ❌ ERROR: run() should have thrown but didn't");
114+
} catch (err) {
115+
if (err instanceof GuardExecutionError) {
116+
console.log(
117+
" ✅ GuardExecutionError thrown — onFailure='log' was NOT called",
118+
);
119+
console.log(` .message: "${err.message}"`);
120+
console.log(
121+
` .originalException: "${err.originalException.message}"`,
122+
);
123+
console.log(
124+
" ℹ️ Check Traceloop UI: error-guard.guard span → ERROR status",
125+
);
126+
} else {
127+
console.log(" ❌ Wrong error type thrown:", err);
128+
}
129+
}
130+
}
131+
132+
// ── Example 3: parallel().runAll() — one guard errors, one passes ─────────────
133+
134+
async function example3_parallelRunAllThrows(): Promise<void> {
135+
sep(
136+
"EXAMPLE 3 — parallel().runAll() propagates error even when another guard passes",
137+
);
138+
139+
console.log(
140+
" Running 2 guards in parallel (runAll): alwaysPass + errorGuard...",
141+
);
142+
143+
const g = new Guardrails(
144+
[alwaysPass, makeErrorGuard("Evaluator API returned 500")],
145+
{},
146+
)
147+
.parallel()
148+
.runAll();
149+
150+
try {
151+
await g.run(async () => "LLM response text");
152+
console.log(" ❌ ERROR: run() should have thrown but didn't");
153+
} catch (err) {
154+
if (err instanceof GuardExecutionError) {
155+
console.log(
156+
" ✅ GuardExecutionError propagated from parallel().runAll()",
157+
);
158+
console.log(` .message: "${err.message}"`);
159+
console.log(
160+
` .originalException: "${err.originalException.message}"`,
161+
);
162+
console.log(` .guardIndex: ${err.guardIndex}`);
163+
console.log(" ℹ️ Check Traceloop UI: always-pass.guard → PASSED,");
164+
console.log(" error-guard.guard → ERROR with full exception event");
165+
} else {
166+
console.log(" ❌ Wrong error type thrown:", err);
167+
}
168+
}
169+
}
170+
171+
// ── Main ──────────────────────────────────────────────────────────────────────
172+
173+
async function main(): Promise<void> {
174+
console.log(`\n${"═".repeat(60)}`);
175+
console.log(" GUARDRAILS ERROR HANDLING EXAMPLE");
176+
console.log(
177+
` Backend: ${process.env.TRACELOOP_BASE_URL ?? "https://api.traceloop.dev"}`,
178+
);
179+
console.log(`${"═".repeat(60)}`);
180+
console.log(
181+
"\n Real guard errors throw GuardExecutionError — never silently",
182+
);
183+
console.log(" treated as logical failures. Check spans for ERROR status.\n");
184+
185+
await traceloop.withWorkflow(
186+
{ name: "guardrails-error-handling-workflow" },
187+
async () => {
188+
await example1_validateThrows();
189+
await example2_runThrows();
190+
await example3_parallelRunAllThrows();
191+
},
192+
);
193+
194+
console.log(`\n${"═".repeat(60)}`);
195+
console.log(" ALL EXAMPLES COMPLETE");
196+
console.log(`${"═".repeat(60)}\n`);
197+
198+
await traceloop.forceFlush();
199+
}
200+
201+
main();

0 commit comments

Comments
 (0)