Skip to content

Commit fc25ab7

Browse files
committed
feat!: Add per-execution runId, at-most-once tracking, and cross-process tracker resumption (#1270)
1 parent 034a89d commit fc25ab7

15 files changed

Lines changed: 572 additions & 187 deletions

File tree

packages/sdk/server-ai/__tests__/Judge.test.ts

Lines changed: 22 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,14 @@ describe('Judge', () => {
4747
],
4848
model: { name: 'gpt-4' },
4949
provider: { name: 'openai' },
50-
tracker: mockTracker,
50+
createTracker: () => mockTracker,
5151
evaluationMetricKey: 'relevance',
5252
};
5353
});
5454

5555
describe('constructor', () => {
5656
it('initializes with proper configuration', () => {
57-
const judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
57+
const judge = new Judge(judgeConfig, mockProvider, mockLogger);
5858

5959
expect(judge).toBeDefined();
6060
});
@@ -64,7 +64,7 @@ describe('Judge', () => {
6464
let judge: Judge;
6565

6666
beforeEach(() => {
67-
judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
67+
judge = new Judge(judgeConfig, mockProvider, mockLogger);
6868
});
6969

7070
it('evaluates AI response successfully', async () => {
@@ -209,12 +209,7 @@ describe('Judge', () => {
209209
evaluationMetricKey: undefined,
210210
evaluationMetricKeys: [],
211211
};
212-
const judgeWithoutMetrics = new Judge(
213-
configWithoutMetrics,
214-
mockTracker,
215-
mockProvider,
216-
mockLogger,
217-
);
212+
const judgeWithoutMetrics = new Judge(configWithoutMetrics, mockProvider, mockLogger);
218213

219214
const result = await judgeWithoutMetrics.evaluate('test input', 'test output');
220215

@@ -231,12 +226,7 @@ describe('Judge', () => {
231226
evaluationMetricKey: 'relevance',
232227
evaluationMetricKeys: undefined,
233228
};
234-
const judgeWithSingleKey = new Judge(
235-
configWithSingleKey,
236-
mockTracker,
237-
mockProvider,
238-
mockLogger,
239-
);
229+
const judgeWithSingleKey = new Judge(configWithSingleKey, mockProvider, mockLogger);
240230

241231
const mockStructuredResponse: StructuredResponse = {
242232
data: {
@@ -275,12 +265,7 @@ describe('Judge', () => {
275265
evaluationMetricKey: undefined,
276266
evaluationMetricKeys: ['relevance', 'accuracy'],
277267
};
278-
const judgeWithLegacyKeys = new Judge(
279-
configWithLegacyKeys,
280-
mockTracker,
281-
mockProvider,
282-
mockLogger,
283-
);
268+
const judgeWithLegacyKeys = new Judge(configWithLegacyKeys, mockProvider, mockLogger);
284269

285270
const mockStructuredResponse: StructuredResponse = {
286271
data: {
@@ -319,12 +304,7 @@ describe('Judge', () => {
319304
evaluationMetricKey: undefined,
320305
evaluationMetricKeys: ['', ' ', 'relevance', 'accuracy'],
321306
};
322-
const judgeWithInvalidKeys = new Judge(
323-
configWithInvalidKeys,
324-
mockTracker,
325-
mockProvider,
326-
mockLogger,
327-
);
307+
const judgeWithInvalidKeys = new Judge(configWithInvalidKeys, mockProvider, mockLogger);
328308

329309
const mockStructuredResponse: StructuredResponse = {
330310
data: {
@@ -364,7 +344,7 @@ describe('Judge', () => {
364344
evaluationMetricKey: 'helpfulness',
365345
evaluationMetricKeys: ['relevance', 'accuracy'],
366346
};
367-
const judgeWithBoth = new Judge(configWithBoth, mockTracker, mockProvider, mockLogger);
347+
const judgeWithBoth = new Judge(configWithBoth, mockProvider, mockLogger);
368348

369349
const mockStructuredResponse: StructuredResponse = {
370350
data: {
@@ -402,12 +382,7 @@ describe('Judge', () => {
402382
...judgeConfig,
403383
messages: undefined,
404384
};
405-
const judgeWithoutMessages = new Judge(
406-
configWithoutMessages,
407-
mockTracker,
408-
mockProvider,
409-
mockLogger,
410-
);
385+
const judgeWithoutMessages = new Judge(configWithoutMessages, mockProvider, mockLogger);
411386

412387
const result = await judgeWithoutMessages.evaluate('test input', 'test output');
413388

@@ -511,7 +486,7 @@ describe('Judge', () => {
511486
let judge: Judge;
512487

513488
beforeEach(() => {
514-
judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
489+
judge = new Judge(judgeConfig, mockProvider, mockLogger);
515490
});
516491

517492
it('evaluates messages and response successfully', async () => {
@@ -596,7 +571,7 @@ describe('Judge', () => {
596571
let judge: Judge;
597572

598573
beforeEach(() => {
599-
judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
574+
judge = new Judge(judgeConfig, mockProvider, mockLogger);
600575
});
601576

602577
it('constructs evaluation messages correctly', () => {
@@ -621,7 +596,7 @@ describe('Judge', () => {
621596
let judge: Judge;
622597

623598
beforeEach(() => {
624-
judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
599+
judge = new Judge(judgeConfig, mockProvider, mockLogger);
625600
});
626601

627602
it('parses valid evaluation response correctly', () => {
@@ -633,7 +608,7 @@ describe('Judge', () => {
633608
},
634609
};
635610

636-
const result = parseResponse(responseData, 'relevance');
611+
const result = parseResponse(responseData, 'relevance', mockTracker);
637612

638613
expect(result).toEqual({
639614
relevance: { score: 0.8, reasoning: 'Good' },
@@ -647,7 +622,7 @@ describe('Judge', () => {
647622
relevance: { score: 0.8, reasoning: 'Good' },
648623
};
649624

650-
const result = parseResponse(responseData, 'relevance');
625+
const result = parseResponse(responseData, 'relevance', mockTracker);
651626

652627
expect(result).toEqual({});
653628
});
@@ -661,7 +636,7 @@ describe('Judge', () => {
661636
},
662637
};
663638

664-
const result = parseResponse(responseData, 'relevance');
639+
const result = parseResponse(responseData, 'relevance', mockTracker);
665640

666641
expect(result).toEqual({});
667642
});
@@ -675,7 +650,7 @@ describe('Judge', () => {
675650
},
676651
};
677652

678-
const result = parseResponse(responseData, 'relevance');
653+
const result = parseResponse(responseData, 'relevance', mockTracker);
679654

680655
expect(result).toEqual({});
681656
expect(mockLogger.warn).toHaveBeenCalledWith(
@@ -693,7 +668,7 @@ describe('Judge', () => {
693668
},
694669
};
695670

696-
const result = parseResponse(responseData, 'relevance');
671+
const result = parseResponse(responseData, 'relevance', mockTracker);
697672

698673
expect(result).toEqual({});
699674
expect(mockLogger.warn).toHaveBeenCalledWith(
@@ -711,7 +686,7 @@ describe('Judge', () => {
711686
},
712687
};
713688

714-
const result = parseResponse(responseData, 'relevance');
689+
const result = parseResponse(responseData, 'relevance', mockTracker);
715690

716691
expect(result).toEqual({});
717692
expect(mockLogger.warn).toHaveBeenCalledWith(
@@ -729,7 +704,7 @@ describe('Judge', () => {
729704
},
730705
};
731706

732-
const result = parseResponse(responseData, 'relevance');
707+
const result = parseResponse(responseData, 'relevance', mockTracker);
733708

734709
expect(result).toEqual({});
735710
expect(mockLogger.warn).toHaveBeenCalledWith(
@@ -744,12 +719,7 @@ describe('Judge', () => {
744719
evaluationMetricKey: undefined,
745720
evaluationMetricKeys: [],
746721
};
747-
const judgeWithEmptyKeys = new Judge(
748-
configWithEmptyKeys,
749-
mockTracker,
750-
mockProvider,
751-
mockLogger,
752-
);
722+
const judgeWithEmptyKeys = new Judge(configWithEmptyKeys, mockProvider, mockLogger);
753723

754724
const result = await judgeWithEmptyKeys.evaluate('test input', 'test output');
755725

@@ -769,7 +739,7 @@ describe('Judge', () => {
769739
},
770740
};
771741

772-
const result = parseResponse(responseData, 'relevance');
742+
const result = parseResponse(responseData, 'relevance', mockTracker);
773743

774744
expect(result).toEqual({});
775745
expect(mockLogger.warn).toHaveBeenCalledWith(
@@ -787,7 +757,7 @@ describe('Judge', () => {
787757
},
788758
};
789759

790-
const result = parseResponse(responseData, 'relevance');
760+
const result = parseResponse(responseData, 'relevance', mockTracker);
791761

792762
expect(result).toEqual({});
793763
expect(mockLogger.warn).toHaveBeenCalledWith(

0 commit comments

Comments
 (0)