EntityProcess
diff --git a/‎README.md‎
Lines changed: 14 additions & 4 deletions b/‎README.md‎
Lines changed: 14 additions & 4 deletions
diff --git a/‎apps/cli/src/commands/eval/run-eval.ts‎
Lines changed: 13 additions & 10 deletions b/‎apps/cli/src/commands/eval/run-eval.ts‎
Lines changed: 13 additions & 10 deletions
diff --git a/‎apps/cli/src/commands/eval/task-bundle.ts‎
Lines changed: 24 additions & 4 deletions b/‎apps/cli/src/commands/eval/task-bundle.ts‎
Lines changed: 24 additions & 4 deletions
diff --git a/‎apps/cli/src/commands/pipeline/input.ts‎
Lines changed: 9 additions & 4 deletions b/‎apps/cli/src/commands/pipeline/input.ts‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎apps/cli/src/commands/pipeline/run.ts‎
Lines changed: 9 additions & 4 deletions b/‎apps/cli/src/commands/pipeline/run.ts‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎apps/cli/test/commands/eval/artifact-writer.test.ts‎
Lines changed: 22 additions & 3 deletions b/‎apps/cli/test/commands/eval/artifact-writer.test.ts‎
Lines changed: 22 additions & 3 deletions
diff --git a/‎apps/cli/test/commands/eval/bundle.test.ts‎
Lines changed: 12 additions & 3 deletions b/‎apps/cli/test/commands/eval/bundle.test.ts‎
Lines changed: 12 additions & 3 deletions
diff --git a/‎apps/cli/test/commands/eval/pipeline/fixtures/builtin-test.eval.yaml‎
Lines changed: 8 additions & 3 deletions b/‎apps/cli/test/commands/eval/pipeline/fixtures/builtin-test.eval.yaml‎
Lines changed: 8 additions & 3 deletions
@@ -88,9 +88,13 @@ evaluate_options:
 
 default_test: file://./default-test.yaml
 
+prompts:
+  - "{{ input }}"
+
 tests:
   - id: fizzbuzz
-    input: Write FizzBuzz in Python. Use lowercase output strings "fizz", "buzz", and "fizzbuzz". Return only one Python code block.
+    vars:
+      input: Write FizzBuzz in Python. Use lowercase output strings "fizz", "buzz", and "fizzbuzz". Return only one Python code block.
     assert:
       - type: contains
         value: "fizz"
@@ -135,9 +139,13 @@ evaluate_options:
 default_test:
   threshold: 0.85
 
+prompts:
+  - "{{ input }}"
+
 tests:
   - id: fizzbuzz
-    input: Write FizzBuzz in Python
+    vars:
+      input: Write FizzBuzz in Python
 ```
 
 `target: local-openai` resolves the configured target id from `.agentv/config.yaml` and uses its provider, model, hooks, and provider settings. The object form above defines a full eval-local target and must include enough provider configuration to run. AgentV records the resolved target information in run artifacts so results can be audited and replayed. The `tags.experiment` label stays `with-skills` because the condition is unchanged; the model/provider variation belongs to the resolved target metadata.
@@ -214,10 +222,11 @@ const { results, summary } = await evaluate({
   experiment: 'with-skills',
   task: async (input) => runMyAppTarget(input),
   threshold: 0.8,
+  prompts: ['{{ input }}'],
   tests: [
     {
       id: 'fizzbuzz',
-      input: 'Write FizzBuzz in Python',
+      vars: { input: 'Write FizzBuzz in Python' },
       assert: [
         { type: 'contains', value: 'fizz' },
         'Implements correct FizzBuzz logic for multiples of 3, 5, and 15',
@@ -249,6 +258,7 @@ export default defineEval({
     earlyExit: false,
   },
   threshold: 0.8,
+  prompts: ['{{ input }}'],
   workspace: {
     scope: 'attempt',
     repos: [
@@ -262,7 +272,7 @@ export default defineEval({
   tests: [
     {
       id: 'fizzbuzz',
-      input: 'Write FizzBuzz in Python',
+      vars: { input: 'Write FizzBuzz in Python' },
       assert: [
         { type: 'contains', value: 'fizz' },
         'Implements correct FizzBuzz logic for multiples of 3, 5, and 15',
 
@@ -228,19 +228,22 @@ function buildPlannedResumeIdentityKeys(
     test.source?.evalFileAbsolutePath,
   ]);
   const suites = Array.from(new Set<string | null>([test.suite ?? null, null]));
+  const promptIds = Array.from(new Set<string | null>([test.prompt?.id ?? null, null]));
 
   for (const evalPath of evalPaths) {
     for (const suite of suites) {
-      keys.add(
-        JSON.stringify({
-          eval_path: evalPath,
-          suite,
-          test_id: test.id ?? 'unknown',
-          prompt_id: test.prompt?.id ?? null,
-          target: target ?? 'unknown',
-          variant: variant ?? null,
-        }),
-      );
+      for (const promptId of promptIds) {
+        keys.add(
+          JSON.stringify({
+            eval_path: evalPath,
+            suite,
+            test_id: test.id ?? 'unknown',
+            prompt_id: promptId,
+            target: target ?? 'unknown',
+            variant: variant ?? null,
+          }),
+        );
+      }
     }
   }
 
 
@@ -19,6 +19,7 @@ const TASK_EVAL_FILENAME = 'EVAL.yaml';
 const TASK_TARGETS_FILENAME = 'targets.yaml';
 const TASK_FILES_DIRNAME = 'files';
 const TASK_GRADERS_DIRNAME = 'graders';
+const INPUT_PROMPT = '{{ input }}';
 const BUNDLE_EVALS_DIRNAME = 'evals';
 const BUNDLE_MANIFEST_FILENAME = 'agentv_bundle.json';
 const BUNDLE_TARGETS_FILENAME = 'targets.yaml';
@@ -499,6 +500,20 @@ function withoutLegacyAssertionKeys(testCase: Record<string, unknown>): Record<s
   );
 }
 
+function moveInputToVars(testCase: Record<string, unknown>): Record<string, unknown> {
+  if (!Object.hasOwn(testCase, 'input')) {
+    return testCase;
+  }
+  const { input, input_files: _inputFiles, ...caseWithoutInput } = testCase;
+  return {
+    ...caseWithoutInput,
+    vars: {
+      ...(isRecord(testCase.vars) ? testCase.vars : {}),
+      input,
+    },
+  };
+}
+
 function serializeGraderDefinition(
   definition: Record<string, unknown>,
   rewrites: ReadonlyMap<string, string>,
@@ -518,14 +533,14 @@ function buildEvalCase(
   const testCase = rewritePathsDeep(parseSourceTestCase(test), rewrites) as Record<string, unknown>;
   const graderDefinitions = test.source?.graderDefinitions ?? [];
   if (graderDefinitions.length > 0) {
-    return {
+    return moveInputToVars({
       ...withoutLegacyAssertionKeys(testCase),
       assert: graderDefinitions.map((grader) =>
         serializeGraderDefinition(grader.definition, rewrites),
       ),
-    };
+    });
   }
-  return testCase;
+  return moveInputToVars(testCase);
 }
 
 function targetReferenceNames(target: TargetDefinition): readonly string[] {
@@ -789,7 +804,10 @@ function buildPortableEvalCase(
 ): Record<string, unknown> {
   const testCase = buildEvalCase(test, rewrites);
   testCase.id = test.id;
-  testCase.input = test.input.map((message) => serializeMessage(message, rewrites));
+  testCase.vars = {
+    ...(isRecord(testCase.vars) ? testCase.vars : {}),
+    input: test.input.map((message) => serializeMessage(message, rewrites)),
+  };
 
   if (test.criteria.trim().length > 0) {
     testCase.criteria = test.criteria;
@@ -1037,6 +1055,7 @@ export async function materializeTaskBundle(
 
   await writeYamlFile(evalPath, {
     target: options.targetName,
+    prompts: [INPUT_PROMPT],
     tests: [evalCase],
   });
   await writeYamlFile(targetsPath, { targets: serializeTargetDefinitions(targetDefinitions) });
@@ -1107,6 +1126,7 @@ export async function materializeEvalBundle(
 
   await writeYamlFile(evalPath, {
     ...(runtime ?? {}),
+    prompts: [INPUT_PROMPT],
     tests: options.tests.map((test) => buildPortableEvalCase(test, rewrites)),
   });
   await writeYamlFile(targetsPath, {
 
@@ -22,7 +22,7 @@ import { readFile } from 'node:fs/promises';
 import { mkdir, writeFile } from 'node:fs/promises';
 import { dirname, join, relative, resolve } from 'node:path';
 
-import type { GraderConfig, LlmGraderConfig, ScriptGraderConfig } from '@agentv/core';
+import type { GraderConfig, LlmBackedGraderConfig, ScriptGraderConfig } from '@agentv/core';
 
 /** Assertion types that can be graded deterministically without external scripts or LLMs. */
 const BUILTIN_ASSERTION_TYPES = new Set([
@@ -150,6 +150,7 @@ export const evalInputCommand = command({
       await writeJson(join(testDir, 'input.json'), {
         input: inputMessages,
         input_files: test.file_paths,
+        ...(test.description ? { description: test.description } : {}),
         metadata: test.metadata ?? {},
       });
 
@@ -266,12 +267,12 @@ async function writeGraderConfigs(
         weight: config.weight ?? 1.0,
         config: config.config ?? {},
       });
-    } else if (assertion.type === 'llm-grader') {
+    } else if (assertion.type === 'llm-grader' || assertion.type === 'llm-rubric') {
       if (!hasLlmGraders) {
         await mkdir(llmGradersDir, { recursive: true });
         hasLlmGraders = true;
       }
-      const config = assertion as LlmGraderConfig;
+      const config = assertion as LlmBackedGraderConfig;
       let promptContent = '';
 
       if (config.resolvedPromptPath) {
@@ -286,7 +287,7 @@ async function writeGraderConfigs(
 
       // For rubrics assertions, include the criteria array directly
       // so grader subagents can evaluate without needing a prompt file.
-      const rubrics = (config as LlmGraderConfig).rubrics;
+      const rubrics = config.rubrics;
       const rubricsData = rubrics?.map((r) => ({
         id: r.id,
         outcome: r.outcome,
@@ -298,7 +299,11 @@ async function writeGraderConfigs(
 
       await writeJson(join(llmGradersDir, `${config.name}.json`), {
         name: config.name,
+        type: config.type,
         prompt_content: promptContent,
+        ...(config.type === 'llm-rubric' && config.value !== undefined
+          ? { value: config.value }
+          : {}),
         ...(rubricsData && rubricsData.length > 0 ? { rubrics: rubricsData } : {}),
         weight: config.weight ?? 1.0,
         threshold: 0.5,
 
@@ -18,7 +18,7 @@ import { tmpdir } from 'node:os';
 import { dirname, join, relative, resolve } from 'node:path';
 
 import { deriveCategory, loadTestSuite } from '@agentv/core';
-import type { GraderConfig, LlmGraderConfig, ScriptGraderConfig } from '@agentv/core';
+import type { GraderConfig, LlmBackedGraderConfig, ScriptGraderConfig } from '@agentv/core';
 import { command, number, oneOf, option, optional, positional, string } from 'cmd-ts';
 
 import { buildDefaultRunDir } from '../eval/result-layout.js';
@@ -174,6 +174,7 @@ export const evalRunCommand = command({
       await writeJson(join(testDir, 'input.json'), {
         input: inputMessages,
         input_files: test.file_paths,
+        ...(test.description ? { description: test.description } : {}),
         metadata: test.metadata ?? {},
       });
 
@@ -463,12 +464,12 @@ async function writeGraderConfigs(
         weight: config.weight ?? 1.0,
         config: config.config ?? {},
       });
-    } else if (assertion.type === 'llm-grader') {
+    } else if (assertion.type === 'llm-grader' || assertion.type === 'llm-rubric') {
       if (!hasLlmGraders) {
         await mkdir(llmGradersDir, { recursive: true });
         hasLlmGraders = true;
       }
-      const config = assertion as LlmGraderConfig;
+      const config = assertion as LlmBackedGraderConfig;
       let promptContent = '';
       if (config.resolvedPromptPath) {
         try {
@@ -480,7 +481,7 @@ async function writeGraderConfigs(
         promptContent = config.prompt;
       }
       // For rubrics assertions, include the criteria array directly
-      const rubrics = (config as LlmGraderConfig).rubrics;
+      const rubrics = config.rubrics;
       const rubricsData = rubrics?.map((r) => ({
         id: r.id,
         outcome: r.outcome,
@@ -492,7 +493,11 @@ async function writeGraderConfigs(
 
       await writeJson(join(llmGradersDir, `${config.name}.json`), {
         name: config.name,
+        type: config.type,
         prompt_content: promptContent,
+        ...(config.type === 'llm-rubric' && config.value !== undefined
+          ? { value: config.value }
+          : {}),
         ...(rubricsData && rubricsData.length > 0 ? { rubrics: rubricsData } : {}),
         weight: config.weight ?? 1.0,
         threshold: 0.5,
 
@@ -2286,7 +2286,15 @@ describe('writeArtifactsFromResults', () => {
     const envFile = path.join(sourceRoot, '.env');
     await writeFile(
       evalFile,
-      ['api_key: literal-secret', 'tests:', '  - id: trace-case', '    input: hello'].join('\n'),
+      [
+        'api_key: literal-secret',
+        'prompts:',
+        '  - "{{ input }}"',
+        'tests:',
+        '  - id: trace-case',
+        '    vars:',
+        '      input: hello',
+      ].join('\n'),
     );
     await writeFile(inputFile, 'input fixture\n');
     await writeFile(promptFile, 'grade this response\n');
@@ -2427,7 +2435,8 @@ describe('writeArtifactsFromResults', () => {
     const [testCase] = parsedEval.tests as Record<string, unknown>[];
     const [assertion] = testCase.assert as Record<string, unknown>[];
     expect(parsedEval.target).toBe('gpt-4o');
-    expect(testCase.input).toBe('file://files/src/input.txt');
+    expect(parsedEval.prompts).toEqual(['{{ input }}']);
+    expect((testCase.vars as Record<string, unknown>).input).toBe('file://files/src/input.txt');
     expect(assertion.prompt).toBe('file://graders/src/grader.md');
     expect(assertion.prompt_script).toEqual([
       'bun',
@@ -2450,7 +2459,17 @@ describe('writeArtifactsFromResults', () => {
     await mkdir(path.dirname(evalFile), { recursive: true });
     await writeFile(
       evalFile,
-      ['tests:', '  - id: alpha', '    input: A', '  - id: beta', '    input: B'].join('\n'),
+      [
+        'prompts:',
+        '  - "{{ input }}"',
+        'tests:',
+        '  - id: alpha',
+        '    vars:',
+        '      input: A',
+        '  - id: beta',
+        '    vars:',
+        '      input: B',
+      ].join('\n'),
     );
     const sourceTests = ['alpha', 'beta'].map(
       (id) =>
 
@@ -149,7 +149,10 @@ tests: ../data/cases.yaml
       template: 'workspaces/workspace-template',
       hooks: { before_each: { command: ['bun', 'scripts/scripts/setup.ts'] } },
     });
-    const input = testCase.input as Array<{ content: Array<Record<string, unknown>> }>;
+    expect(bundledEval.prompts).toEqual(['{{ input }}']);
+    const input = (testCase.vars as Record<string, unknown>).input as Array<{
+      content: Array<Record<string, unknown>>;
+    }>;
     expect(input[0]?.content[0]).toEqual({ type: 'file', value: 'files/data/input.txt' });
 
     const bundledTargets = await readFile(path.join(bundleDir, 'targets.yaml'), 'utf8');
@@ -181,12 +184,15 @@ tests: ../data/cases.yaml
   - id: candidate
     provider: mock
     response: '{"answer":"inline bundled response"}'
+prompts:
+  - "{{ input }}"
 tests:
   - id: inline-case
-    input: hello
     assert:
       - type: contains
         value: inline
+    vars:
+      input: hello
 `,
       'utf8',
     );
@@ -223,12 +229,15 @@ tests:
       path.join(sourceDir, 'evals', 'missing-template.eval.yaml'),
       `workspace:
   template: ../does-not-exist
+prompts:
+  - "{{ input }}"
 tests:
   - id: missing-template
-    input: hello
     assert:
       - type: contains
         value: Mock
+    vars:
+      input: hello
 `,
       'utf8',
     );
 
@@ -1,14 +1,19 @@
 name: builtin-test
+prompts:
+  - "{{ input }}"
 tests:
   - id: test-01
-    input: hello world
-    criteria: Response echoes the input
     assert:
+      - metric: echoes_input
+        type: llm-rubric
+        value: Response echoes the input
       - metric: has_hello
         type: contains
         value: hello
       - metric: matches_pattern
         type: regex
-        value: "h[aeiou]llo"
+        value: h[aeiou]llo
       - metric: is_valid_json
         type: is-json
+    vars:
+      input: hello world