@@ -5,14 +5,315 @@ import type {
55 TriggerEvaluatorRequest ,
66 TriggerEvaluatorResponse ,
77 InputSchemaMapping ,
8+ CreateCustomEvaluatorRequest ,
9+ EvaluatorUpdateRequest ,
10+ EvaluatorExecuteOptions ,
11+ EvaluatorCreateResponse ,
12+ EvaluatorUpdateResponse ,
13+ EvaluatorExecuteResponse ,
14+ EvaluatorSource ,
15+ EvaluatorCatalogItem ,
16+ EvaluatorData ,
17+ PropertySchema ,
818} from "../../interfaces/evaluator.interface" ;
919import type { ExecutionResponse } from "../../interfaces/experiment.interface" ;
1020
21+ function toSnakeCase ( key : string ) : string {
22+ return key . replace ( / [ A - Z ] / g, ( c ) => `_${ c . toLowerCase ( ) } ` ) ;
23+ }
24+
1125export class Evaluator extends BaseDatasetEntity {
1226 constructor ( client : TraceloopClient ) {
1327 super ( client ) ;
1428 }
1529
30+ // ─── Standalone evaluator methods ────────────────────────────────────────
31+
32+ /**
33+ * Creates a new LLM-as-a-judge custom evaluator without binding it to any project or environment.
34+ * @param options The evaluator configuration including name, messages, provider, model, and schemas
35+ * @returns The created evaluator's ID and slug
36+ * @throws Error if the API request fails
37+ *
38+ * @example
39+ * const result = await client.evaluator.create({
40+ * name: "Quality Evaluator",
41+ * provider: "openai",
42+ * model: "gpt-4o",
43+ * messages: [
44+ * { role: "system", content: "You are a strict quality evaluator." },
45+ * { role: "user", content: "Evaluate: {{text}}" },
46+ * ],
47+ * inputSchema: [{ name: "text", type: "string" }],
48+ * outputSchema: [{ name: "passed", type: "boolean" }],
49+ * });
50+ * console.log(result.id, result.slug);
51+ */
52+ async create (
53+ options : CreateCustomEvaluatorRequest ,
54+ ) : Promise < EvaluatorCreateResponse > {
55+ const payload = this . buildPayload ( options ) ;
56+ const response = await this . client . post ( "/v2/evaluators" , payload ) ;
57+ const data = await this . handleResponse ( response ) ;
58+ return {
59+ id : data . evaluatorId ,
60+ slug : data . slug ,
61+ } ;
62+ }
63+
64+ /**
65+ * Lists all evaluators for the organization, optionally filtered by source.
66+ * @param source Optional filter — "custom" for user-created evaluators, "prebuilt" for Traceloop built-in evaluators, omit to get all
67+ * @returns Array of evaluators with their metadata and schemas
68+ * @throws Error if the API request fails or if an invalid source is provided
69+ *
70+ * @example
71+ * // Get all evaluators
72+ * const all = await client.evaluator.list();
73+ *
74+ * @example
75+ * // Get only custom evaluators
76+ * const custom = await client.evaluator.list("custom");
77+ */
78+ async list ( source ?: EvaluatorSource ) : Promise < EvaluatorCatalogItem [ ] > {
79+ const query = source ? `?source=${ source } ` : "" ;
80+ const response = await this . client . get ( `/v2/evaluators${ query } ` ) ;
81+ const data = await this . handleResponse ( response ) ;
82+ return (
83+ Array . isArray ( data . evaluators ) ? data . evaluators : [ ]
84+ ) as EvaluatorCatalogItem [ ] ;
85+ }
86+
87+ /**
88+ * Retrieves the full configuration of a single evaluator by ID or slug.
89+ * @param identifier The evaluator's ID (e.g. "cmb6nr...") or slug (e.g. "my-quality-evaluator")
90+ * @returns Full evaluator details including config, provider, model, messages, and schemas
91+ * @throws Error if the evaluator is not found or if the config is missing required fields
92+ *
93+ * @example
94+ * // Get by ID
95+ * const evaluator = await client.evaluator.get("cmb6nr...");
96+ *
97+ * @example
98+ * // Get by slug
99+ * const evaluator = await client.evaluator.get("my-quality-evaluator");
100+ * console.log(evaluator.provider, evaluator.model);
101+ */
102+ async get ( identifier : string ) : Promise < EvaluatorData > {
103+ this . validateIdentifier ( identifier ) ;
104+ const response = await this . client . get (
105+ `/v2/evaluators/${ encodeURIComponent ( identifier ) } ` ,
106+ ) ;
107+ const data = await this . handleResponse ( response ) ;
108+ return this . toEvaluatorData ( data ) ;
109+ }
110+
111+ /**
112+ * Partially updates a custom evaluator. Only the fields you provide are changed.
113+ * To update the LLM config (provider, model, messages, etc.), pass the full config object — it replaces the existing one.
114+ * @param identifier The evaluator's ID or slug
115+ * @param patch The fields to update — all fields are optional, but at least one must be provided
116+ * @returns The updated evaluator's ID
117+ * @throws Error if the evaluator is not found, if no fields are provided, or if the API request fails
118+ *
119+ * @example
120+ * // Update name only
121+ * await client.evaluator.update("my-quality-evaluator", { name: "Updated Name" });
122+ *
123+ * @example
124+ * // Update config and schemas
125+ * await client.evaluator.update("cmb6nr...", {
126+ * provider: "anthropic",
127+ * model: "claude-3-5-sonnet",
128+ * messages: [{ role: "user", content: "Evaluate: {{text}}" }],
129+ * inputSchema: [{ name: "text", type: "string" }],
130+ * outputSchema: [{ name: "passed", type: "boolean" }],
131+ * });
132+ */
133+ async update (
134+ identifier : string ,
135+ patch : EvaluatorUpdateRequest ,
136+ ) : Promise < EvaluatorUpdateResponse > {
137+ this . validateIdentifier ( identifier ) ;
138+ const payload : Record < string , unknown > = { } ;
139+
140+ if ( patch . name !== undefined ) payload . name = patch . name ;
141+ if ( patch . inputSchema !== undefined )
142+ payload . input_schema = patch . inputSchema . map ( ( p ) => ( {
143+ name : p . name ,
144+ type : p . type ,
145+ description : p . description ,
146+ enum_values : p . enumValues ,
147+ } ) ) ;
148+ if ( patch . outputSchema !== undefined )
149+ payload . output_schema = patch . outputSchema . map ( ( p ) => ( {
150+ name : p . name ,
151+ type : p . type ,
152+ description : p . description ,
153+ enum_values : p . enumValues ,
154+ } ) ) ;
155+
156+ const hasConfigField =
157+ patch . description !== undefined ||
158+ patch . provider !== undefined ||
159+ patch . messages !== undefined ||
160+ patch . model !== undefined ||
161+ patch . temperature !== undefined ||
162+ patch . maxTokens !== undefined ||
163+ patch . topP !== undefined ||
164+ patch . frequencyPenalty !== undefined ||
165+ patch . presencePenalty !== undefined ;
166+
167+ if ( hasConfigField ) {
168+ const config : Record < string , unknown > = { } ;
169+ if ( patch . description !== undefined )
170+ config . description = patch . description ;
171+ if ( patch . provider !== undefined ) config . provider = patch . provider ;
172+ if ( patch . messages !== undefined ) config . messages = patch . messages ;
173+
174+ const hasLLMField =
175+ patch . model !== undefined ||
176+ patch . temperature !== undefined ||
177+ patch . maxTokens !== undefined ||
178+ patch . topP !== undefined ||
179+ patch . frequencyPenalty !== undefined ||
180+ patch . presencePenalty !== undefined ;
181+
182+ if ( hasLLMField ) {
183+ const llmConfig : Record < string , unknown > = { } ;
184+ if ( patch . model !== undefined ) llmConfig . model = patch . model ;
185+ if ( patch . temperature !== undefined )
186+ llmConfig . temperature = patch . temperature ;
187+ if ( patch . maxTokens !== undefined )
188+ llmConfig . max_tokens = patch . maxTokens ;
189+ if ( patch . topP !== undefined ) llmConfig . top_p = patch . topP ;
190+ if ( patch . frequencyPenalty !== undefined )
191+ llmConfig . frequency_penalty = patch . frequencyPenalty ;
192+ if ( patch . presencePenalty !== undefined )
193+ llmConfig . presence_penalty = patch . presencePenalty ;
194+ config . llm_config = llmConfig ;
195+ }
196+
197+ payload . config = config ;
198+ }
199+
200+ const response = await this . client . patch (
201+ `/v2/evaluators/${ encodeURIComponent ( identifier ) } ` ,
202+ payload ,
203+ ) ;
204+ const data = await this . handleResponse ( response ) ;
205+ return { id : data . evaluator ?. id } ;
206+ }
207+
208+ /**
209+ * Runs an evaluator synchronously with the given input and returns the result.
210+ * The input keys must match the evaluator's input schema.
211+ * @param identifier The evaluator's ID or slug
212+ * @param options The execution options containing the input values
213+ * @returns The execution result shaped according to the evaluator's output schema
214+ * @throws Error if the evaluator is not found, if the input is empty, or if the API request fails
215+ *
216+ * @example
217+ * const result = await client.evaluator.run("my-quality-evaluator", {
218+ * input: { text: "The sky is blue because of Rayleigh scattering." },
219+ * });
220+ * console.log(result.result); // { passed: true, reason: "Factually accurate." }
221+ */
222+ async run (
223+ identifier : string ,
224+ options : EvaluatorExecuteOptions ,
225+ ) : Promise < EvaluatorExecuteResponse > {
226+ this . validateIdentifier ( identifier ) ;
227+ const response = await this . client . post (
228+ `/v2/evaluators/${ encodeURIComponent ( identifier ) } /executions` ,
229+ { input : options . input } ,
230+ ) ;
231+ const data = await this . handleResponse ( response ) ;
232+ return {
233+ executionId : data . executionId ?? "" ,
234+ result : data ,
235+ } ;
236+ }
237+
238+ // ─── Private helpers ──────────────────────────────────────────────────────
239+
240+ private validateIdentifier ( identifier : string ) : void {
241+ if ( ! identifier || ! identifier . trim ( ) ) {
242+ throw new Error ( "Evaluator identifier must be a non-empty string" ) ;
243+ }
244+ }
245+
246+ private buildPayload (
247+ options : CreateCustomEvaluatorRequest ,
248+ ) : Record < string , unknown > {
249+ return Object . fromEntries (
250+ Object . entries ( options )
251+ . filter ( ( [ , v ] ) => v !== undefined )
252+ . map ( ( [ k , v ] ) => [
253+ toSnakeCase ( k ) ,
254+ Array . isArray ( v )
255+ ? v . map ( ( item ) =>
256+ Object . fromEntries (
257+ Object . entries ( item as object )
258+ . filter ( ( [ , iv ] ) => iv !== undefined )
259+ . map ( ( [ ik , iv ] ) => [ toSnakeCase ( ik ) , iv ] ) ,
260+ ) ,
261+ )
262+ : v ,
263+ ] ) ,
264+ ) ;
265+ }
266+
267+ private toEvaluatorData ( data : any ) : EvaluatorData {
268+ const config = data . config ?? { } ;
269+ const llmConfig = config . llmConfig ?? { } ;
270+
271+ const provider : string = config . provider ;
272+ const model : string = llmConfig . model ;
273+
274+ if ( ! provider )
275+ throw new Error ( "Evaluator config is missing required field: provider" ) ;
276+ if ( ! model )
277+ throw new Error ( "Evaluator config is missing required field: model" ) ;
278+
279+ const toPropertySchema = ( p : any ) : PropertySchema => ( {
280+ name : p . name ,
281+ type : p . type ,
282+ description : p . description ,
283+ enumValues : p . enumValues ,
284+ } ) ;
285+
286+ const result : EvaluatorData = {
287+ id : data . id ,
288+ name : data . name ,
289+ slug : data . slug ,
290+ type : data . type ?? "" ,
291+ description : data . description ?? "" ,
292+ version : data . version ,
293+ source : data . source ,
294+ inputSchema : ( data . inputSchema ?? [ ] ) . map ( toPropertySchema ) ,
295+ outputSchema : ( data . outputSchema ?? [ ] ) . map ( toPropertySchema ) ,
296+ config : data . config ,
297+ createdAt : data . createdAt ,
298+ updatedAt : data . updatedAt ,
299+ messages : config . messages ?? [ ] ,
300+ provider,
301+ model,
302+ } ;
303+
304+ if ( llmConfig . temperature !== undefined )
305+ result . temperature = llmConfig . temperature ;
306+ if ( llmConfig . maxTokens !== undefined )
307+ result . maxTokens = llmConfig . maxTokens ;
308+ if ( llmConfig . topP !== undefined ) result . topP = llmConfig . topP ;
309+ if ( llmConfig . frequencyPenalty !== undefined )
310+ result . frequencyPenalty = llmConfig . frequencyPenalty ;
311+ if ( llmConfig . presencePenalty !== undefined )
312+ result . presencePenalty = llmConfig . presencePenalty ;
313+
314+ return result ;
315+ }
316+
16317 /**
17318 * Run evaluators on experiment task results and wait for completion
18319 */
0 commit comments