PredictabilityAtScale
diff --git a/‎README.md‎
Lines changed: 15 additions & 2 deletions b/‎README.md‎
Lines changed: 15 additions & 2 deletions
diff --git a/‎docs/testing.md‎
Lines changed: 40 additions & 8 deletions b/‎docs/testing.md‎
Lines changed: 40 additions & 8 deletions
diff --git a/‎fixtures/prompts/hello.test.yaml‎
Lines changed: 4 additions & 0 deletions b/‎fixtures/prompts/hello.test.yaml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/cli/commands/init.ts‎
Lines changed: 57 additions & 1 deletion b/‎src/cli/commands/init.ts‎
Lines changed: 57 additions & 1 deletion
diff --git a/‎src/testing.ts‎
Lines changed: 143 additions & 0 deletions b/‎src/testing.ts‎
Lines changed: 143 additions & 0 deletions
@@ -69,9 +69,12 @@ This creates:
 prompts/
 ├── defaults.md         # Folder-level defaults (provider, model, metadata, system instructions)
 ├── hello.md            # Sample prompt with variables
-├── hello.test.yaml     # Test sidecar with sample inputs
+├── hello.test.yaml     # Test sidecar with sample inputs and hardcoded responses
 └── shared/
     └── tone.md         # Shared system instructions (included via composition)
+
+tests/
+└── hello.prompt.test.mjs # Executable starter test for the hello prompt
 ```
 
 ### 2. Write a prompt
@@ -540,11 +543,21 @@ Hello {{ name }}!`,
 ## Testing Helpers
 
 ```typescript
-import { createMockAsset, createMockResolvedAsset, parseTestPrompt } from 'promptopskit/testing';
+import {
+  createHardcodedPromptResponder,
+  createMockAsset,
+  createMockResolvedAsset,
+  loadPromptTestSidecar,
+  parseTestPrompt,
+} from 'promptopskit/testing';
 
 const asset = createMockAsset({ model: 'gpt-5.4' });
 const resolved = createMockResolvedAsset();
 const parsed = parseTestPrompt('---\nid: test\nschema_version: 1\n---\n\nHello');
+
+const sidecar = await loadPromptTestSidecar('./prompts/hello.test.yaml');
+const respond = createHardcodedPromptResponder(sidecar);
+const response = respond('basic-greeting');
 ```
 
 ## API Reference
 
@@ -19,7 +19,15 @@ npm run test:serial
 Import from `promptopskit/testing`:
 
 ```typescript
-import { createMockAsset, createMockResolvedAsset, parseTestPrompt } from 'promptopskit/testing';
+import {
+  createHardcodedPromptResponder,
+  createMockAsset,
+  createMockResolvedAsset,
+  getHardcodedPromptResponse,
+  loadPromptTestSidecar,
+  parseTestPrompt,
+  renderPromptTestCase,
+} from 'promptopskit/testing';
 ```
 
 ### `createMockAsset(overrides?)`
@@ -82,7 +90,7 @@ Hello {{ name }}!
 
 ## Test sidecar files
 
-By convention, test data for a prompt lives in a `.test.yaml` file alongside the prompt:
+By convention, test data for a prompt lives in a `.test.yaml` file alongside the prompt. `promptopskit init` creates `hello.md`, `hello.test.yaml`, and `tests/hello.prompt.test.mjs` so the starter prompt has executable test coverage immediately.
 
 ```
 prompts/
@@ -100,9 +108,13 @@ cases:
   - name: basic-greeting
     variables:
       name: "World"
+    response:
+      message: "Hello, World! How can I help you today?"
   - name: named-greeting
     variables:
       name: "Alice"
+    response:
+      message: "Hello, Alice! How can I help you today?"
 ```
 
 Each case has:
@@ -111,6 +123,7 @@ Each case has:
 |-------|------|-------------|
 | `name` | `string` | Test case name |
 | `variables` | `Record<string, string>` | Variable values for this case |
+| `response` | `unknown` | Optional hardcoded response for deterministic development and CI tests |
 
 ### CLI integration
 
@@ -124,19 +137,38 @@ promptopskit render hello.md
 ### Using in tests
 
 ```typescript
-import { readFileSync } from 'node:fs';
-import { parse } from 'yaml';
 import { createPromptOpsKit } from 'promptopskit';
+import { loadPromptTestSidecar, renderPromptTestCase } from 'promptopskit/testing';
 
 const kit = createPromptOpsKit({ sourceDir: './prompts' });
-const sidecar = parse(readFileSync('./prompts/hello.test.yaml', 'utf-8'));
+const sidecar = await loadPromptTestSidecar('./prompts/hello.test.yaml');
 
 for (const testCase of sidecar.cases) {
-  const result = await kit.renderPrompt({
+  const { rendered, response } = await renderPromptTestCase(kit, {
+    sidecar,
+    caseName: testCase.name,
     path: 'hello',
     provider: 'openai',
-    variables: testCase.variables,
+    environment: 'dev',
+    strict: true,
   });
-  // Assert on result.request.body
+
+  // Assert on rendered.request.body and, when present, response.
 }
 ```
+
+### Hardcoded responses
+
+PromptOpsKit renders provider request bodies, but your app owns the network call. For unit tests and local development, keep deterministic responses in the sidecar and route your app through a tiny fake model runner:
+
+```typescript
+import { createHardcodedPromptResponder, loadPromptTestSidecar } from 'promptopskit/testing';
+
+const sidecar = await loadPromptTestSidecar('./prompts/hello.test.yaml');
+const respond = createHardcodedPromptResponder(sidecar);
+
+const result = respond('basic-greeting');
+// { message: 'Hello, World! How can I help you today?' }
+```
+
+This is intentionally different from GitHub Models: GitHub Models is useful for interactive prompt prototyping, side-by-side model comparison, and evaluations in GitHub. PromptOpsKit sidecars are repo-native fixtures for rendering, unit tests, CI, and deterministic app development without making provider calls.
@@ -3,7 +3,11 @@ cases:
     variables:
       name: "World"
       app_context: "Onboarding flow"
+    response:
+      message: "Hello, World! How can I help you today?"
   - name: named
     variables:
       name: "Alice"
       app_context: "Dashboard"
+    response:
+      message: "Hello, Alice! How can I help you today?"
@@ -1,5 +1,5 @@
 import { writeFile, mkdir } from 'node:fs/promises';
-import { join, dirname } from 'node:path';
+import { join, dirname, relative } from 'node:path';
 import { existsSync, readFileSync } from 'node:fs';
 
 const HELP = `
@@ -76,10 +76,14 @@ const TEST_SIDECAR = `cases:
     variables:
       name: "World"
       app_context: "Welcome screen"
+    response:
+      message: "Hello, World! How can I help you today?"
   - name: named-greeting
     variables:
       name: "Alice"
       app_context: "Settings page"
+    response:
+      message: "Hello, Alice! How can I help you today?"
 `;
 
 const EXAMPLE_USAGE = `// Example: render the hello prompt and send it to OpenAI
@@ -154,13 +158,17 @@ export async function init(args: string[]): Promise<void> {
   }
 
   const dir = args.find((a) => !a.startsWith('--')) ?? './prompts';
+  const testFilePath = join(dirname(dir), 'tests', 'hello.prompt.test.mjs');
+  const promptsDirFromTest = relative(dirname(testFilePath), dir) || '.';
+  const helloPromptTest = createHelloPromptTest(promptsDirFromTest);
 
   const files: Array<{ path: string; content: string }> = [
     { path: join(dir, 'defaults.md'), content: DEFAULTS },
     { path: join(dir, 'hello.md'), content: HELLO_PROMPT },
     { path: join(dir, 'hello.test.yaml'), content: TEST_SIDECAR },
     { path: join(dir, 'shared', 'tone.md'), content: TONE_INCLUDE },
     { path: join(dir, 'example-usage.ts'), content: EXAMPLE_USAGE },
+    { path: testFilePath, content: helloPromptTest },
   ];
 
   let created = 0;
@@ -190,8 +198,56 @@ export async function init(args: string[]): Promise<void> {
         console.log(`Tip: Add to your package.json scripts:`);
         console.log(`  "build:prompts": "promptopskit compile ${dir}"`);
       }
+      if (!pkg.scripts?.test) {
+        console.log();
+        console.log(`Tip: Add a test script to run the generated prompt test:`);
+        console.log(`  "test": "node --test tests/*.test.mjs"`);
+      }
     } catch {
       // Ignore parse errors
     }
   }
 }
+
+function createHelloPromptTest(promptsDirFromTest: string): string {
+  return `import assert from 'node:assert/strict';
+import { dirname, resolve } from 'node:path';
+import test from 'node:test';
+import { fileURLToPath } from 'node:url';
+import { createPromptOpsKit } from 'promptopskit';
+import {
+  getHardcodedPromptResponse,
+  loadPromptTestSidecar,
+  renderPromptTestCase,
+} from 'promptopskit/testing';
+
+const testDir = dirname(fileURLToPath(import.meta.url));
+const promptsDir = resolve(testDir, ${JSON.stringify(promptsDirFromTest)});
+
+test('hello prompt renders every sidecar case', async () => {
+  const kit = createPromptOpsKit({ sourceDir: promptsDir, cache: false });
+  const sidecar = await loadPromptTestSidecar(resolve(promptsDir, 'hello.test.yaml'));
+
+  for (const testCase of sidecar.cases) {
+    const { rendered } = await renderPromptTestCase(kit, {
+      sidecar,
+      caseName: testCase.name,
+      path: 'hello',
+      provider: 'openai',
+      environment: 'dev',
+      strict: true,
+    });
+
+    assert.ok(rendered.request?.body?.messages);
+  }
+});
+
+test('hello prompt can return a deterministic response without calling a model', async () => {
+  const sidecar = await loadPromptTestSidecar(resolve(promptsDir, 'hello.test.yaml'));
+
+  assert.deepEqual(getHardcodedPromptResponse(sidecar, 'basic-greeting'), {
+    message: 'Hello, World! How can I help you today?',
+  });
+});
+`;
+}
@@ -1,6 +1,33 @@
+import { readFile } from 'node:fs/promises';
+
+import matter from 'gray-matter';
+
 import { parsePrompt } from './parser/index.js';
 import type { PromptAsset, ResolvedPromptAsset } from './schema/index.js';
 
+export interface PromptTestCase<TResponse = unknown> {
+  name: string;
+  variables?: Record<string, string>;
+  response?: TResponse;
+  expected_response?: TResponse;
+}
+
+export interface PromptTestSidecar<TResponse = unknown> {
+  cases: Array<PromptTestCase<TResponse>>;
+}
+
+export interface PromptTestRenderer {
+  renderPrompt(options: {
+    path?: string;
+    source?: string;
+    provider: string;
+    environment?: string;
+    tier?: string;
+    variables?: Record<string, string>;
+    strict?: boolean;
+  }): Promise<unknown>;
+}
+
 /**
  * Create a mock PromptAsset for testing.
  */
@@ -47,3 +74,119 @@ export function parseTestPrompt(source: string): PromptAsset {
   const { asset } = parsePrompt(source);
   return asset;
 }
+
+/**
+ * Parse a .test.yaml sidecar file.
+ */
+export function parsePromptTestSidecar<TResponse = unknown>(source: string): PromptTestSidecar<TResponse> {
+  const parsed = matter(`---\n${source.trim()}\n---\n`);
+  const data = parsed.data as Partial<PromptTestSidecar<TResponse>>;
+
+  if (!Array.isArray(data.cases)) {
+    throw new Error('Prompt test sidecar must include a "cases" array.');
+  }
+
+  return {
+    cases: data.cases.map((testCase, index) => {
+      if (!testCase || typeof testCase !== 'object') {
+        throw new Error(`Prompt test case at index ${index} must be an object.`);
+      }
+
+      if (typeof testCase.name !== 'string' || testCase.name.length === 0) {
+        throw new Error(`Prompt test case at index ${index} must include a non-empty "name".`);
+      }
+
+      return testCase;
+    }),
+  };
+}
+
+/**
+ * Load a .test.yaml sidecar file from disk.
+ */
+export async function loadPromptTestSidecar<TResponse = unknown>(
+  filePath: string | URL,
+): Promise<PromptTestSidecar<TResponse>> {
+  return parsePromptTestSidecar<TResponse>(await readFile(filePath, 'utf-8'));
+}
+
+/**
+ * Find a named test case in a sidecar.
+ */
+export function getPromptTestCase<TResponse = unknown>(
+  sidecar: PromptTestSidecar<TResponse> | Array<PromptTestCase<TResponse>>,
+  name: string,
+): PromptTestCase<TResponse> {
+  const cases = Array.isArray(sidecar) ? sidecar : sidecar.cases;
+  const testCase = cases.find((candidate) => candidate.name === name);
+
+  if (!testCase) {
+    throw new Error(`Prompt test case "${name}" was not found.`);
+  }
+
+  return testCase;
+}
+
+/**
+ * Read the canned response for a named case.
+ */
+export function getHardcodedPromptResponse<TResponse = unknown>(
+  sidecar: PromptTestSidecar<TResponse> | Array<PromptTestCase<TResponse>>,
+  name: string,
+): TResponse {
+  const testCase = getPromptTestCase(sidecar, name);
+  const response = testCase.response ?? testCase.expected_response;
+
+  if (response === undefined) {
+    throw new Error(`Prompt test case "${name}" does not define a "response".`);
+  }
+
+  return response;
+}
+
+/**
+ * Create a small responder for unit tests and local development flows.
+ */
+export function createHardcodedPromptResponder<TResponse = unknown>(
+  sidecar: PromptTestSidecar<TResponse> | Array<PromptTestCase<TResponse>>,
+): (name: string) => TResponse {
+  return (name) => getHardcodedPromptResponse(sidecar, name);
+}
+
+/**
+ * Render a prompt using variables from a named sidecar case.
+ */
+export async function renderPromptTestCase<TResponse = unknown>(
+  kit: PromptTestRenderer,
+  options: {
+    sidecar: PromptTestSidecar<TResponse> | Array<PromptTestCase<TResponse>>;
+    caseName: string;
+    path?: string;
+    source?: string;
+    provider: string;
+    environment?: string;
+    tier?: string;
+    strict?: boolean;
+  },
+): Promise<{
+  testCase: PromptTestCase<TResponse>;
+  rendered: unknown;
+  response?: TResponse;
+}> {
+  const testCase = getPromptTestCase(options.sidecar, options.caseName);
+  const rendered = await kit.renderPrompt({
+    path: options.path,
+    source: options.source,
+    provider: options.provider,
+    environment: options.environment,
+    tier: options.tier,
+    variables: testCase.variables,
+    strict: options.strict,
+  });
+
+  return {
+    testCase,
+    rendered,
+    response: testCase.response ?? testCase.expected_response,
+  };
+}