Skip to content

Commit dd89e8d

Browse files
feat(evaluators): add standalone evaluator create/list/get/update/run SDK methods (#908)
1 parent 643425a commit dd89e8d

File tree

5 files changed

+1433
-0
lines changed

5 files changed

+1433
-0
lines changed

packages/traceloop-sdk/src/lib/client/evaluator/evaluator.ts

Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,315 @@ import type {
55
TriggerEvaluatorRequest,
66
TriggerEvaluatorResponse,
77
InputSchemaMapping,
8+
CreateCustomEvaluatorRequest,
9+
EvaluatorUpdateRequest,
10+
EvaluatorExecuteOptions,
11+
EvaluatorCreateResponse,
12+
EvaluatorUpdateResponse,
13+
EvaluatorExecuteResponse,
14+
EvaluatorSource,
15+
EvaluatorCatalogItem,
16+
EvaluatorData,
17+
PropertySchema,
818
} from "../../interfaces/evaluator.interface";
919
import type { ExecutionResponse } from "../../interfaces/experiment.interface";
1020

21+
function toSnakeCase(key: string): string {
22+
return key.replace(/[A-Z]/g, (c) => `_${c.toLowerCase()}`);
23+
}
24+
1125
export class Evaluator extends BaseDatasetEntity {
1226
constructor(client: TraceloopClient) {
1327
super(client);
1428
}
1529

30+
// ─── Standalone evaluator methods ────────────────────────────────────────
31+
32+
/**
33+
* Creates a new LLM-as-a-judge custom evaluator without binding it to any project or environment.
34+
* @param options The evaluator configuration including name, messages, provider, model, and schemas
35+
* @returns The created evaluator's ID and slug
36+
* @throws Error if the API request fails
37+
*
38+
* @example
39+
* const result = await client.evaluator.create({
40+
* name: "Quality Evaluator",
41+
* provider: "openai",
42+
* model: "gpt-4o",
43+
* messages: [
44+
* { role: "system", content: "You are a strict quality evaluator." },
45+
* { role: "user", content: "Evaluate: {{text}}" },
46+
* ],
47+
* inputSchema: [{ name: "text", type: "string" }],
48+
* outputSchema: [{ name: "passed", type: "boolean" }],
49+
* });
50+
* console.log(result.id, result.slug);
51+
*/
52+
async create(
53+
options: CreateCustomEvaluatorRequest,
54+
): Promise<EvaluatorCreateResponse> {
55+
const payload = this.buildPayload(options);
56+
const response = await this.client.post("/v2/evaluators", payload);
57+
const data = await this.handleResponse(response);
58+
return {
59+
id: data.evaluatorId,
60+
slug: data.slug,
61+
};
62+
}
63+
64+
/**
65+
* Lists all evaluators for the organization, optionally filtered by source.
66+
* @param source Optional filter — "custom" for user-created evaluators, "prebuilt" for Traceloop built-in evaluators, omit to get all
67+
* @returns Array of evaluators with their metadata and schemas
68+
* @throws Error if the API request fails or if an invalid source is provided
69+
*
70+
* @example
71+
* // Get all evaluators
72+
* const all = await client.evaluator.list();
73+
*
74+
* @example
75+
* // Get only custom evaluators
76+
* const custom = await client.evaluator.list("custom");
77+
*/
78+
async list(source?: EvaluatorSource): Promise<EvaluatorCatalogItem[]> {
79+
const query = source ? `?source=${source}` : "";
80+
const response = await this.client.get(`/v2/evaluators${query}`);
81+
const data = await this.handleResponse(response);
82+
return (
83+
Array.isArray(data.evaluators) ? data.evaluators : []
84+
) as EvaluatorCatalogItem[];
85+
}
86+
87+
/**
88+
* Retrieves the full configuration of a single evaluator by ID or slug.
89+
* @param identifier The evaluator's ID (e.g. "cmb6nr...") or slug (e.g. "my-quality-evaluator")
90+
* @returns Full evaluator details including config, provider, model, messages, and schemas
91+
* @throws Error if the evaluator is not found or if the config is missing required fields
92+
*
93+
* @example
94+
* // Get by ID
95+
* const evaluator = await client.evaluator.get("cmb6nr...");
96+
*
97+
* @example
98+
* // Get by slug
99+
* const evaluator = await client.evaluator.get("my-quality-evaluator");
100+
* console.log(evaluator.provider, evaluator.model);
101+
*/
102+
async get(identifier: string): Promise<EvaluatorData> {
103+
this.validateIdentifier(identifier);
104+
const response = await this.client.get(
105+
`/v2/evaluators/${encodeURIComponent(identifier)}`,
106+
);
107+
const data = await this.handleResponse(response);
108+
return this.toEvaluatorData(data);
109+
}
110+
111+
/**
112+
* Partially updates a custom evaluator. Only the fields you provide are changed.
113+
* To update the LLM config (provider, model, messages, etc.), pass the full config object — it replaces the existing one.
114+
* @param identifier The evaluator's ID or slug
115+
* @param patch The fields to update — all fields are optional, but at least one must be provided
116+
* @returns The updated evaluator's ID
117+
* @throws Error if the evaluator is not found, if no fields are provided, or if the API request fails
118+
*
119+
* @example
120+
* // Update name only
121+
* await client.evaluator.update("my-quality-evaluator", { name: "Updated Name" });
122+
*
123+
* @example
124+
* // Update config and schemas
125+
* await client.evaluator.update("cmb6nr...", {
126+
* provider: "anthropic",
127+
* model: "claude-3-5-sonnet",
128+
* messages: [{ role: "user", content: "Evaluate: {{text}}" }],
129+
* inputSchema: [{ name: "text", type: "string" }],
130+
* outputSchema: [{ name: "passed", type: "boolean" }],
131+
* });
132+
*/
133+
async update(
134+
identifier: string,
135+
patch: EvaluatorUpdateRequest,
136+
): Promise<EvaluatorUpdateResponse> {
137+
this.validateIdentifier(identifier);
138+
const payload: Record<string, unknown> = {};
139+
140+
if (patch.name !== undefined) payload.name = patch.name;
141+
if (patch.inputSchema !== undefined)
142+
payload.input_schema = patch.inputSchema.map((p) => ({
143+
name: p.name,
144+
type: p.type,
145+
description: p.description,
146+
enum_values: p.enumValues,
147+
}));
148+
if (patch.outputSchema !== undefined)
149+
payload.output_schema = patch.outputSchema.map((p) => ({
150+
name: p.name,
151+
type: p.type,
152+
description: p.description,
153+
enum_values: p.enumValues,
154+
}));
155+
156+
const hasConfigField =
157+
patch.description !== undefined ||
158+
patch.provider !== undefined ||
159+
patch.messages !== undefined ||
160+
patch.model !== undefined ||
161+
patch.temperature !== undefined ||
162+
patch.maxTokens !== undefined ||
163+
patch.topP !== undefined ||
164+
patch.frequencyPenalty !== undefined ||
165+
patch.presencePenalty !== undefined;
166+
167+
if (hasConfigField) {
168+
const config: Record<string, unknown> = {};
169+
if (patch.description !== undefined)
170+
config.description = patch.description;
171+
if (patch.provider !== undefined) config.provider = patch.provider;
172+
if (patch.messages !== undefined) config.messages = patch.messages;
173+
174+
const hasLLMField =
175+
patch.model !== undefined ||
176+
patch.temperature !== undefined ||
177+
patch.maxTokens !== undefined ||
178+
patch.topP !== undefined ||
179+
patch.frequencyPenalty !== undefined ||
180+
patch.presencePenalty !== undefined;
181+
182+
if (hasLLMField) {
183+
const llmConfig: Record<string, unknown> = {};
184+
if (patch.model !== undefined) llmConfig.model = patch.model;
185+
if (patch.temperature !== undefined)
186+
llmConfig.temperature = patch.temperature;
187+
if (patch.maxTokens !== undefined)
188+
llmConfig.max_tokens = patch.maxTokens;
189+
if (patch.topP !== undefined) llmConfig.top_p = patch.topP;
190+
if (patch.frequencyPenalty !== undefined)
191+
llmConfig.frequency_penalty = patch.frequencyPenalty;
192+
if (patch.presencePenalty !== undefined)
193+
llmConfig.presence_penalty = patch.presencePenalty;
194+
config.llm_config = llmConfig;
195+
}
196+
197+
payload.config = config;
198+
}
199+
200+
const response = await this.client.patch(
201+
`/v2/evaluators/${encodeURIComponent(identifier)}`,
202+
payload,
203+
);
204+
const data = await this.handleResponse(response);
205+
return { id: data.evaluator?.id };
206+
}
207+
208+
/**
209+
* Runs an evaluator synchronously with the given input and returns the result.
210+
* The input keys must match the evaluator's input schema.
211+
* @param identifier The evaluator's ID or slug
212+
* @param options The execution options containing the input values
213+
* @returns The execution result shaped according to the evaluator's output schema
214+
* @throws Error if the evaluator is not found, if the input is empty, or if the API request fails
215+
*
216+
* @example
217+
* const result = await client.evaluator.run("my-quality-evaluator", {
218+
* input: { text: "The sky is blue because of Rayleigh scattering." },
219+
* });
220+
* console.log(result.result); // { passed: true, reason: "Factually accurate." }
221+
*/
222+
async run(
223+
identifier: string,
224+
options: EvaluatorExecuteOptions,
225+
): Promise<EvaluatorExecuteResponse> {
226+
this.validateIdentifier(identifier);
227+
const response = await this.client.post(
228+
`/v2/evaluators/${encodeURIComponent(identifier)}/executions`,
229+
{ input: options.input },
230+
);
231+
const data = await this.handleResponse(response);
232+
return {
233+
executionId: data.executionId ?? "",
234+
result: data,
235+
};
236+
}
237+
238+
// ─── Private helpers ──────────────────────────────────────────────────────
239+
240+
private validateIdentifier(identifier: string): void {
241+
if (!identifier || !identifier.trim()) {
242+
throw new Error("Evaluator identifier must be a non-empty string");
243+
}
244+
}
245+
246+
private buildPayload(
247+
options: CreateCustomEvaluatorRequest,
248+
): Record<string, unknown> {
249+
return Object.fromEntries(
250+
Object.entries(options)
251+
.filter(([, v]) => v !== undefined)
252+
.map(([k, v]) => [
253+
toSnakeCase(k),
254+
Array.isArray(v)
255+
? v.map((item) =>
256+
Object.fromEntries(
257+
Object.entries(item as object)
258+
.filter(([, iv]) => iv !== undefined)
259+
.map(([ik, iv]) => [toSnakeCase(ik), iv]),
260+
),
261+
)
262+
: v,
263+
]),
264+
);
265+
}
266+
267+
private toEvaluatorData(data: any): EvaluatorData {
268+
const config = data.config ?? {};
269+
const llmConfig = config.llmConfig ?? {};
270+
271+
const provider: string = config.provider;
272+
const model: string = llmConfig.model;
273+
274+
if (!provider)
275+
throw new Error("Evaluator config is missing required field: provider");
276+
if (!model)
277+
throw new Error("Evaluator config is missing required field: model");
278+
279+
const toPropertySchema = (p: any): PropertySchema => ({
280+
name: p.name,
281+
type: p.type,
282+
description: p.description,
283+
enumValues: p.enumValues,
284+
});
285+
286+
const result: EvaluatorData = {
287+
id: data.id,
288+
name: data.name,
289+
slug: data.slug,
290+
type: data.type ?? "",
291+
description: data.description ?? "",
292+
version: data.version,
293+
source: data.source,
294+
inputSchema: (data.inputSchema ?? []).map(toPropertySchema),
295+
outputSchema: (data.outputSchema ?? []).map(toPropertySchema),
296+
config: data.config,
297+
createdAt: data.createdAt,
298+
updatedAt: data.updatedAt,
299+
messages: config.messages ?? [],
300+
provider,
301+
model,
302+
};
303+
304+
if (llmConfig.temperature !== undefined)
305+
result.temperature = llmConfig.temperature;
306+
if (llmConfig.maxTokens !== undefined)
307+
result.maxTokens = llmConfig.maxTokens;
308+
if (llmConfig.topP !== undefined) result.topP = llmConfig.topP;
309+
if (llmConfig.frequencyPenalty !== undefined)
310+
result.frequencyPenalty = llmConfig.frequencyPenalty;
311+
if (llmConfig.presencePenalty !== undefined)
312+
result.presencePenalty = llmConfig.presencePenalty;
313+
314+
return result;
315+
}
316+
16317
/**
17318
* Run evaluators on experiment task results and wait for completion
18319
*/

packages/traceloop-sdk/src/lib/client/traceloop-client.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,18 @@ export class TraceloopClient {
8484
});
8585
}
8686

87+
async patch(path: string, body: Record<string, unknown> | any) {
88+
return await fetch(`${this.baseUrl}${path}`, {
89+
method: "PATCH",
90+
headers: {
91+
"Content-Type": "application/json",
92+
Authorization: `Bearer ${this.apiKey}`,
93+
"X-Traceloop-SDK-Version": this.version,
94+
},
95+
body: JSON.stringify(body),
96+
});
97+
}
98+
8799
async delete(path: string) {
88100
return await fetch(`${this.baseUrl}${path}`, {
89101
method: "DELETE",

0 commit comments

Comments
 (0)