Skip to content

Commit 91d1d21

Browse files
christsoCopilot
andauthored
feat(core): support eval test vars templating (#1255)
* feat(core): support eval test vars templating Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * fix(core): tighten eval vars loader typing Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * style(schema): format generated eval schema Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 567b625 commit 91d1d21

10 files changed

Lines changed: 344 additions & 73 deletions

File tree

apps/cli/src/commands/import/promptfoo.test.ts

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ tests:
5151
expect(suite.tests[0]).toMatchObject({
5252
id: 'capital',
5353
criteria: 'Capital answer stays deterministic',
54-
input: 'Answer clearly: What is the capital of France?',
54+
input: 'Answer clearly: {{question}}',
55+
vars: { question: 'What is the capital of France?' },
5556
assertions: [{ type: 'equals', value: 'Paris' }],
5657
metadata: {
5758
promptfoo: {
@@ -95,7 +96,8 @@ tests: file://./tests.jsonl
9596
const yaml = await convertPromptfooToAgentvYaml(configPath);
9697
expect(yaml).toContain('# Converted from promptfoo config:');
9798
expect(yaml).toContain('id: math');
98-
expect(yaml).toContain('input: "Please answer: What is 2 + 2?"');
99+
expect(yaml).toContain('input: "Please answer: {{question}}"');
100+
expect(yaml).toContain('vars:');
99101
expect(yaml).toContain('type: equals');
100102
});
101103

@@ -129,7 +131,10 @@ tests: file://./tests.csv
129131
expect(suite.tests[0]).toMatchObject({
130132
id: 'capital-question',
131133
criteria: 'Capital question',
132-
input: 'Question: What is the capital of France?',
134+
input: 'Question: {{question}}',
135+
vars: {
136+
question: 'What is the capital of France?',
137+
},
133138
assertions: [
134139
{ type: 'equals', value: 'Paris' },
135140
{ type: 'contains', value: 'Paris' },

apps/cli/src/commands/import/promptfoo.ts

Lines changed: 10 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ interface AgentvAssertion {
7474
interface AgentvTest {
7575
readonly id: string;
7676
readonly input: AgentvInput;
77+
readonly vars?: Record<string, JsonValue>;
7778
readonly assertions?: readonly AgentvAssertion[];
7879
readonly [key: string]: unknown;
7980
}
@@ -825,7 +826,8 @@ async function buildAgentvTests(options: {
825826
}
826827

827828
for (const prompt of promptSelection) {
828-
const renderedInput = renderPrompt(prompt, effectiveVars, testOptions);
829+
const importedVars = testOptions.disableVarExpansion ? undefined : effectiveVars;
830+
const templatedInput = buildPromptTemplate(prompt, testOptions);
829831
const promptSuffix =
830832
promptSelection.length > 1 ? `--${sanitizeName(prompt.key || prompt.label)}` : '';
831833
const metadata = buildPromptfooMetadata(rawTest, effectiveVars, prompt, effectiveTargets);
@@ -838,7 +840,8 @@ async function buildAgentvTests(options: {
838840
const test: AgentvTest = {
839841
id: `${explicitId ?? baseId}${promptSuffix}`,
840842
...(typeof rawTest.description === 'string' ? { criteria: rawTest.description } : {}),
841-
input: renderedInput,
843+
input: templatedInput,
844+
...(importedVars && Object.keys(importedVars).length > 0 ? { vars: importedVars } : {}),
842845
...(convertedCaseAssertions.length > 0 ? { assertions: convertedCaseAssertions } : {}),
843846
...(metadata ? { metadata } : {}),
844847
...(execution ? { execution } : {}),
@@ -970,52 +973,30 @@ function filterProviders(
970973
return matched.map((provider) => provider.targetName);
971974
}
972975

973-
function renderPrompt(
976+
function buildPromptTemplate(
974977
prompt: PromptfooPrompt,
975-
vars: Record<string, JsonValue>,
976978
testOptions: PromptfooTestOptions,
977979
): AgentvInput {
978980
const prefix = testOptions.prefix ?? '';
979981
const suffix = testOptions.suffix ?? '';
980982

981983
if (typeof prompt.content === 'string') {
982-
return `${prefix}${renderTemplate(prompt.content, vars)}${suffix}`;
984+
return `${prefix}${preserveTemplate(prompt.content)}${suffix}`;
983985
}
984986

985987
return prompt.content.map((message, index, allMessages) => ({
986988
role: message.role,
987-
content: `${index === 0 ? prefix : ''}${renderTemplate(message.content, vars)}${index === allMessages.length - 1 ? suffix : ''}`,
989+
content: `${index === 0 ? prefix : ''}${preserveTemplate(message.content)}${index === allMessages.length - 1 ? suffix : ''}`,
988990
}));
989991
}
990992

991-
function renderTemplate(template: string, vars: Record<string, JsonValue>) {
993+
function preserveTemplate(template: string) {
992994
if (template.includes('{%') || template.includes('{#') || /\{\{[^}]*\|/.test(template)) {
993995
throw new Error(
994996
`Unsupported Nunjucks syntax in prompt '${template.slice(0, 80)}'. Use simple {{var}} templates or migrate manually`,
995997
);
996998
}
997-
998-
return template.replace(/\{\{\s*([^}]+?)\s*\}\}/g, (_match, expression: string) => {
999-
const value = lookupPath(vars, expression.trim());
1000-
if (value === undefined) {
1001-
return '';
1002-
}
1003-
if (typeof value === 'string') return value;
1004-
return JSON.stringify(value);
1005-
});
1006-
}
1007-
1008-
function lookupPath(
1009-
value: JsonValue | Record<string, JsonValue>,
1010-
expression: string,
1011-
): JsonValue | undefined {
1012-
if (!expression) return undefined;
1013-
return expression.split('.').reduce<JsonValue | undefined>((current, part) => {
1014-
if (!current || typeof current !== 'object' || Array.isArray(current)) {
1015-
return undefined;
1016-
}
1017-
return (current as Record<string, JsonValue>)[part];
1018-
}, value as JsonValue);
999+
return template;
10191000
}
10201001

10211002
function buildPromptfooMetadata(

apps/web/src/content/docs/docs/evaluation/eval-files.mdx

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,34 @@ For local sources, omit `checkout.resolve`. If you need to pin the local clone t
264264
MY_REPO_LOCAL_PATH=/home/dev/repos/my-repo
265265
```
266266

267+
## Per-Test Template Variables
268+
269+
Eval YAML also supports per-test `vars` for data-driven prompt templates. Use `{{name}}` placeholders in test-facing text fields, and AgentV resolves them when the suite loads.
270+
271+
```yaml
272+
input: "Answer clearly: {{question}}"
273+
274+
tests:
275+
- id: capital
276+
vars:
277+
question: What is the capital of France?
278+
expected_answer: Paris
279+
criteria: "Answers {{question}} correctly"
280+
input:
281+
- role: user
282+
content: "Question: {{question}}"
283+
expected_output: "{{expected_answer}}"
284+
```
285+
286+
### Behavior
287+
288+
- `vars` is defined per test as an object
289+
- `{{name}}` and dotted paths like `{{ user.name }}` are supported
290+
- Substitution applies to suite-level `input`, test `input`, `input_files`, `criteria`, `expected_output`, and conversation turn `input` / `expected_output`
291+
- When the whole string is a single placeholder, the original JSON value is preserved
292+
- Missing variables are left unchanged, so unrelated template syntax is not silently blanked out
293+
- `vars` interpolation is separate from environment interpolation: `{{question}}` uses test data, `${{ PROJECT_NAME }}` uses environment variables
294+
267295
## JSONL Format
268296

269297
For large-scale evaluations, AgentV supports JSONL (JSON Lines) format. Each line is a single test:

apps/web/src/content/docs/docs/tools/import.mdx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ Default output: `EVAL.yaml` beside the promptfoo config file.
4646
- inline tests and external YAML / JSON / JSONL / CSV test files
4747
- `defaultTest.assert` promoted to suite-level `assertions`
4848
- per-test `vars`, `description`, `threshold`, `metadata`, prompt filters, and provider filters
49+
- simple prompt templates are preserved as AgentV `{{var}}` input templates instead of being eagerly flattened
4950
- deterministic assertions that map directly to AgentV: `equals`, `contains`, `icontains`, `regex`, `starts-with`, `ends-with`, `contains-any`, `contains-all`, `icontains-any`, `icontains-all`, `is-json`, `latency`, `cost`
5051
- rubric-style assertions mapped to `llm-grader`: `llm-rubric`, `g-eval`, `factuality`, `context-faithfulness`, `context-recall`
5152

packages/core/src/evaluation/interpolation.ts

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import type { EnvLookup } from './providers/types.js';
22

33
const ENV_VAR_PATTERN = /\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
4+
const TEMPLATE_VAR_PATTERN = /\{\{\s*([A-Za-z_][A-Za-z0-9_.]*)\s*\}\}/g;
5+
const WHOLE_TEMPLATE_VAR_PATTERN = /^\{\{\s*([A-Za-z_][A-Za-z0-9_.]*)\s*\}\}$/;
46

57
/**
68
* Regex that matches a string consisting of exactly one `${{ VAR }}` reference
@@ -29,6 +31,42 @@ function coercePrimitive(value: string): unknown {
2931
return value;
3032
}
3133

34+
function isPlainObject(value: unknown): value is Record<string, unknown> {
35+
return typeof value === 'object' && value !== null && !Array.isArray(value);
36+
}
37+
38+
function cloneTemplateValue(value: unknown): unknown {
39+
if (Array.isArray(value)) {
40+
return value.map((item) => cloneTemplateValue(item));
41+
}
42+
if (isPlainObject(value)) {
43+
const result: Record<string, unknown> = {};
44+
for (const [key, nested] of Object.entries(value)) {
45+
result[key] = cloneTemplateValue(nested);
46+
}
47+
return result;
48+
}
49+
return value;
50+
}
51+
52+
function stringifyTemplateValue(value: unknown): string {
53+
if (typeof value === 'string') return value;
54+
return JSON.stringify(value);
55+
}
56+
57+
function lookupTemplateVar(
58+
vars: Readonly<Record<string, unknown>>,
59+
expression: string,
60+
): unknown | undefined {
61+
if (!expression) return undefined;
62+
return expression.split('.').reduce<unknown>((current, segment) => {
63+
if (!isPlainObject(current)) {
64+
return undefined;
65+
}
66+
return current[segment];
67+
}, vars);
68+
}
69+
3270
/**
3371
* Recursively interpolate `${{ VAR }}` references in all string values.
3472
* Missing variables resolve to empty string.
@@ -71,3 +109,40 @@ export function interpolateEnv(value: unknown, env: EnvLookup): unknown {
71109
}
72110
return value;
73111
}
112+
113+
/**
114+
* Recursively interpolate `{{ var }}` references in string values using per-test vars.
115+
* Missing variables are left unchanged so unrelated template syntaxes remain intact.
116+
* When the whole string is a single variable reference, the original JSON value is preserved.
117+
*/
118+
export function interpolateTemplateVars(
119+
value: unknown,
120+
vars: Readonly<Record<string, unknown>>,
121+
): unknown {
122+
if (typeof value === 'string') {
123+
const wholeMatch = WHOLE_TEMPLATE_VAR_PATTERN.exec(value);
124+
if (wholeMatch) {
125+
const resolved = lookupTemplateVar(vars, wholeMatch[1] as string);
126+
return resolved === undefined ? value : cloneTemplateValue(resolved);
127+
}
128+
129+
return value.replace(TEMPLATE_VAR_PATTERN, (match, expression: string) => {
130+
const resolved = lookupTemplateVar(vars, expression);
131+
return resolved === undefined ? match : stringifyTemplateValue(resolved);
132+
});
133+
}
134+
135+
if (Array.isArray(value)) {
136+
return value.map((item) => interpolateTemplateVars(item, vars));
137+
}
138+
139+
if (isPlainObject(value)) {
140+
const result: Record<string, unknown> = {};
141+
for (const [key, nested] of Object.entries(value)) {
142+
result[key] = interpolateTemplateVars(nested, vars);
143+
}
144+
return result;
145+
}
146+
147+
return value;
148+
}

packages/core/src/evaluation/validation/eval-file.schema.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,13 @@ const MessageSchema = z.object({
2525
content: MessageContentSchema,
2626
});
2727

28+
const JsonObjectSchema = z.object({}).catchall(z.unknown());
29+
2830
/** Input: string shorthand or message array */
2931
const InputSchema = z.union([z.string(), z.array(MessageSchema)]);
3032

3133
/** Expected output: string, object, or message array */
32-
const ExpectedOutputSchema = z.union([z.string(), z.record(z.unknown()), z.array(MessageSchema)]);
34+
const ExpectedOutputSchema = z.union([z.string(), JsonObjectSchema, z.array(MessageSchema)]);
3335

3436
// ---------------------------------------------------------------------------
3537
// Grader schemas (YAML input format)
@@ -389,6 +391,7 @@ const ConversationTurnSchema = z.object({
389391

390392
const EvalTestSchema = z.object({
391393
id: z.string().min(1),
394+
vars: JsonObjectSchema.optional(),
392395
criteria: z.string().optional(),
393396
input: InputSchema.optional(),
394397
input_files: z.array(z.string()).optional(),

0 commit comments

Comments
 (0)