diff --git a/src/cli.js b/src/cli.js
index 00c5431b5..b5a482218 100644
--- a/src/cli.js
+++ b/src/cli.js
@@ -8,7 +8,7 @@ import { buildGraph } from './builder.js';
 import { loadConfig } from './config.js';
 import { findCycles, formatCycles } from './cycles.js';
 import { findDbPath } from './db.js';
-import { buildEmbeddings, MODELS, search } from './embedder.js';
+import { buildEmbeddings, EMBEDDING_STRATEGIES, MODELS, search } from './embedder.js';
 import { exportDOT, exportJSON, exportMermaid } from './export.js';
 import { setVerbose } from './logger.js';
 import {
@@ -418,9 +418,12 @@ program
     console.log('\nAvailable embedding models:\n');
     for (const [key, config] of Object.entries(MODELS)) {
       const def = key === 'minilm' ? ' (default)' : '';
-      console.log(`  ${key.padEnd(12)} ${String(config.dim).padStart(4)}d  ${config.desc}${def}`);
+      const ctx = config.contextWindow ? `${config.contextWindow} ctx` : '';
+      console.log(
+        `  ${key.padEnd(12)} ${String(config.dim).padStart(4)}d  ${ctx.padEnd(9)} ${config.desc}${def}`,
+      );
     }
-    console.log('\nUsage: codegraph embed --model <name>');
+    console.log('\nUsage: codegraph embed --model <name> --strategy <structured|source>');
     console.log('       codegraph search "query" --model <name>\n');
   });
 
@@ -434,9 +437,20 @@ program
     'Embedding model: minilm (default), jina-small, jina-base, jina-code, nomic, nomic-v1.5, bge-large. Run `codegraph models` for details',
     'minilm',
   )
+  .option(
+    '-s, --strategy <name>',
+    `Embedding strategy: ${EMBEDDING_STRATEGIES.join(', ')}. "structured" uses graph context (callers/callees), "source" embeds raw code`,
+    'structured',
+  )
   .action(async (dir, opts) => {
+    if (!EMBEDDING_STRATEGIES.includes(opts.strategy)) {
+      console.error(
+        `Unknown strategy: ${opts.strategy}. Available: ${EMBEDDING_STRATEGIES.join(', ')}`,
+      );
+      process.exit(1);
+    }
     const root = path.resolve(dir || '.');
-    await buildEmbeddings(root, opts.model);
+    await buildEmbeddings(root, opts.model, undefined, { strategy: opts.strategy });
   });
 
 program
diff --git a/src/embedder.js b/src/embedder.js
index 6876a00e4..67eb39e5d 100644
--- a/src/embedder.js
+++ b/src/embedder.js
@@ -26,47 +26,56 @@ export const MODELS = {
   minilm: {
     name: 'Xenova/all-MiniLM-L6-v2',
     dim: 384,
+    contextWindow: 256,
     desc: 'Smallest, fastest (~23MB). General text.',
     quantized: true,
   },
   'jina-small': {
     name: 'Xenova/jina-embeddings-v2-small-en',
     dim: 512,
+    contextWindow: 8192,
     desc: 'Small, good quality (~33MB). General text.',
     quantized: false,
   },
   'jina-base': {
     name: 'Xenova/jina-embeddings-v2-base-en',
     dim: 768,
+    contextWindow: 8192,
     desc: 'Good quality (~137MB). General text, 8192 token context.',
     quantized: false,
   },
   'jina-code': {
     name: 'Xenova/jina-embeddings-v2-base-code',
     dim: 768,
+    contextWindow: 8192,
     desc: 'Code-aware (~137MB). Trained on code+text, best for code search.',
     quantized: false,
   },
   nomic: {
     name: 'Xenova/nomic-embed-text-v1',
     dim: 768,
+    contextWindow: 8192,
     desc: 'Good local quality (~137MB). 8192 context.',
     quantized: false,
   },
   'nomic-v1.5': {
     name: 'nomic-ai/nomic-embed-text-v1.5',
     dim: 768,
+    contextWindow: 8192,
     desc: 'Improved nomic (~137MB). Matryoshka dimensions, 8192 context.',
     quantized: false,
   },
   'bge-large': {
     name: 'Xenova/bge-large-en-v1.5',
     dim: 1024,
+    contextWindow: 512,
     desc: 'Best general retrieval (~335MB). Top MTEB scores.',
     quantized: false,
   },
 };
 
+export const EMBEDDING_STRATEGIES = ['structured', 'source'];
+
 export const DEFAULT_MODEL = 'minilm';
 const BATCH_SIZE_MAP = {
   minilm: 32,
@@ -89,6 +98,108 @@ function getModelConfig(modelKey) {
   return config;
 }
 
+/**
+ * Rough token estimate (~4 chars per token for code/English).
+ * Conservative — avoids adding a tokenizer dependency.
+ */
+export function estimateTokens(text) {
+  return Math.ceil(text.length / 4);
+}
+
+/**
+ * Extract leading comment text (JSDoc, //, #, etc.) above a function line.
+ * Returns the cleaned comment text or null if none found.
+ */
+function extractLeadingComment(lines, fnLineIndex) {
+  const raw = [];
+  for (let i = fnLineIndex - 1; i >= Math.max(0, fnLineIndex - 15); i--) {
+    const trimmed = lines[i].trim();
+    if (/^(\/\/|\/\*|\*\/|\*|#|\/\/\/)/.test(trimmed)) {
+      raw.unshift(trimmed);
+    } else if (trimmed === '') {
+      if (raw.length > 0) break;
+    } else {
+      break;
+    }
+  }
+  if (raw.length === 0) return null;
+  return raw
+    .map((line) =>
+      line
+        .replace(/^\/\*\*?\s?|\*\/$/g, '') // opening /** or /* and closing */
+        .replace(/^\*\s?/, '') // middle * lines
+        .replace(/^\/\/\/?\s?/, '') // // or ///
+        .replace(/^#\s?/, '') // # (Python/Ruby)
+        .trim(),
+    )
+    .filter((l) => l.length > 0)
+    .join(' ');
+}
+
+/**
+ * Build graph-enriched text for a symbol using dependency context.
+ * Produces compact, semantic text (~100 tokens) instead of full source code.
+ */
+function buildStructuredText(node, file, lines, calleesStmt, callersStmt) {
+  const readable = splitIdentifier(node.name);
+  const parts = [`${node.kind} ${node.name} (${readable}) in ${file}`];
+  const startLine = Math.max(0, node.line - 1);
+
+  // Extract parameters from signature (best-effort, single-line)
+  const sigLine = lines[startLine] || '';
+  const paramMatch = sigLine.match(/\(([^)]*)\)/);
+  if (paramMatch?.[1]?.trim()) {
+    parts.push(`Parameters: ${paramMatch[1].trim()}`);
+  }
+
+  // Graph context: callees (capped at 10)
+  const callees = calleesStmt.all(node.id);
+  if (callees.length > 0) {
+    parts.push(
+      `Calls: ${callees
+        .slice(0, 10)
+        .map((c) => c.name)
+        .join(', ')}`,
+    );
+  }
+
+  // Graph context: callers (capped at 10)
+  const callers = callersStmt.all(node.id);
+  if (callers.length > 0) {
+    parts.push(
+      `Called by: ${callers
+        .slice(0, 10)
+        .map((c) => c.name)
+        .join(', ')}`,
+    );
+  }
+
+  // Leading comment (high semantic value) or first few lines of code
+  const comment = extractLeadingComment(lines, startLine);
+  if (comment) {
+    parts.push(comment);
+  } else {
+    const endLine = Math.min(lines.length, startLine + 4);
+    const snippet = lines.slice(startLine, endLine).join('\n').trim();
+    if (snippet) parts.push(snippet);
+  }
+
+  return parts.join('\n');
+}
+
+/**
+ * Build raw source-code text for a symbol (original strategy).
+ */
+function buildSourceText(node, file, lines) {
+  const startLine = Math.max(0, node.line - 1);
+  const endLine = node.end_line
+    ? Math.min(lines.length, node.end_line)
+    : Math.min(lines.length, startLine + 15);
+  const context = lines.slice(startLine, endLine).join('\n');
+  const readable = splitIdentifier(node.name);
+  return `${node.kind} ${node.name} (${readable}) in ${file}\n${context}`;
+}
+
 /**
  * Lazy-load @huggingface/transformers.
  * This is an optional dependency — gives a clear error if not installed.
@@ -203,10 +314,14 @@ function initEmbeddingsSchema(db) {
 
 /**
  * Build embeddings for all functions/methods/classes in the graph.
+ * @param {string} rootDir - Project root directory
+ * @param {string} modelKey - Model identifier from MODELS registry
+ * @param {string} [customDbPath] - Override path to graph.db
+ * @param {object} [options] - Embedding options
+ * @param {string} [options.strategy='structured'] - 'structured' (graph-enriched) or 'source' (raw code)
  */
-export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
-  // path already imported at top
-  // fs already imported at top
+export async function buildEmbeddings(rootDir, modelKey, customDbPath, options = {}) {
+  const strategy = options.strategy || 'structured';
   const dbPath = customDbPath || findDbPath(null);
 
   const db = new Database(dbPath);
@@ -221,7 +336,24 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
     )
     .all();
 
-  console.log(`Building embeddings for ${nodes.length} symbols...`);
+  console.log(`Building embeddings for ${nodes.length} symbols (strategy: ${strategy})...`);
+
+  // Prepare graph-context queries for structured strategy
+  let calleesStmt, callersStmt;
+  if (strategy === 'structured') {
+    calleesStmt = db.prepare(`
+      SELECT DISTINCT n.name FROM edges e
+      JOIN nodes n ON e.target_id = n.id
+      WHERE e.source_id = ? AND e.kind = 'calls'
+      ORDER BY n.name
+    `);
+    callersStmt = db.prepare(`
+      SELECT DISTINCT n.name FROM edges e
+      JOIN nodes n ON e.source_id = n.id
+      WHERE e.target_id = ? AND e.kind = 'calls'
+      ORDER BY n.name
+    `);
+  }
 
   const byFile = new Map();
   for (const node of nodes) {
@@ -232,6 +364,9 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
   const texts = [];
   const nodeIds = [];
   const previews = [];
+  const config = getModelConfig(modelKey);
+  const contextWindow = config.contextWindow;
+  let overflowCount = 0;
 
   for (const [file, fileNodes] of byFile) {
     const fullPath = path.join(rootDir, file);
@@ -244,20 +379,31 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
     }
 
     for (const node of fileNodes) {
-      const startLine = Math.max(0, node.line - 1);
-      const endLine = node.end_line
-        ? Math.min(lines.length, node.end_line)
-        : Math.min(lines.length, startLine + 15);
-      const context = lines.slice(startLine, endLine).join('\n');
-
-      const readable = splitIdentifier(node.name);
-      const text = `${node.kind} ${node.name} (${readable}) in ${file}\n${context}`;
+      let text =
+        strategy === 'structured'
+          ? buildStructuredText(node, file, lines, calleesStmt, callersStmt)
+          : buildSourceText(node, file, lines);
+
+      // Detect and handle context window overflow
+      const tokens = estimateTokens(text);
+      if (tokens > contextWindow) {
+        overflowCount++;
+        const maxChars = contextWindow * 4;
+        text = text.slice(0, maxChars);
+      }
+
       texts.push(text);
       nodeIds.push(node.id);
       previews.push(`${node.name} (${node.kind}) -- ${file}:${node.line}`);
     }
   }
 
+  if (overflowCount > 0) {
+    warn(
+      `${overflowCount} symbol(s) exceeded model context window (${contextWindow} tokens) and were truncated`,
+    );
+  }
+
   console.log(`Embedding ${texts.length} symbols...`);
   const { vectors, dim } = await embed(texts, modelKey);
 
@@ -269,16 +415,19 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath) {
     for (let i = 0; i < vectors.length; i++) {
       insert.run(nodeIds[i], Buffer.from(vectors[i].buffer), previews[i]);
     }
-    const config = getModelConfig(modelKey);
     insertMeta.run('model', config.name);
     insertMeta.run('dim', String(dim));
     insertMeta.run('count', String(vectors.length));
+    insertMeta.run('strategy', strategy);
     insertMeta.run('built_at', new Date().toISOString());
+    if (overflowCount > 0) {
+      insertMeta.run('truncated_count', String(overflowCount));
+    }
   });
   insertAll();
 
   console.log(
-    `\nStored ${vectors.length} embeddings (${dim}d, ${getModelConfig(modelKey).name}) in graph.db`,
+    `\nStored ${vectors.length} embeddings (${dim}d, ${config.name}, strategy: ${strategy}) in graph.db`,
   );
   db.close();
 }
diff --git a/src/index.js b/src/index.js
index a0caf3b4d..7435b8a61 100644
--- a/src/index.js
+++ b/src/index.js
@@ -21,7 +21,9 @@ export {
   buildEmbeddings,
   cosineSim,
   DEFAULT_MODEL,
+  EMBEDDING_STRATEGIES,
   embed,
+  estimateTokens,
   MODELS,
   multiSearchData,
   search,
diff --git a/tests/search/embedding-benchmark.js b/tests/search/embedding-benchmark.js
new file mode 100644
index 000000000..11dc9aad0
--- /dev/null
+++ b/tests/search/embedding-benchmark.js
@@ -0,0 +1,124 @@
+#!/usr/bin/env node
+
+/**
+ * Embedding strategy benchmark — compares structured vs source strategies
+ * against real search queries on the current project's graph.
+ *
+ * Prerequisites:
+ *   - @huggingface/transformers installed
+ *   - codegraph build already run (graph.db exists)
+ *
+ * Usage:
+ *   node tests/search/embedding-benchmark.js
+ *   node tests/search/embedding-benchmark.js --model minilm
+ */
+
+import path from 'node:path';
+import { buildEmbeddings, DEFAULT_MODEL, MODELS, searchData } from '../../src/embedder.js';
+
+const model = process.argv.includes('--model')
+  ? process.argv[process.argv.indexOf('--model') + 1]
+  : DEFAULT_MODEL;
+
+const rootDir = '.';
+const dbPath = path.resolve('.codegraph/graph.db');
+
+// Queries with expected best-match symbol name
+const QUERIES = [
+  { q: 'parse source code with tree-sitter', expect: 'parseFilesAuto' },
+  { q: 'find circular dependencies', expect: 'findCycles' },
+  { q: 'build dependency graph from source files', expect: 'buildGraph' },
+  { q: 'resolve import path to actual file', expect: 'resolveImportPath' },
+  { q: 'cosine similarity between vectors', expect: 'cosineSim' },
+  { q: 'export graph as DOT format', expect: 'exportDOT' },
+  { q: 'semantic search with embeddings', expect: 'search' },
+  { q: 'incremental file hashing', expect: 'hashFile' },
+  { q: 'load configuration from file', expect: 'loadConfig' },
+  { q: 'extract functions and classes from code', expect: 'extractJavaScript' },
+  { q: 'impact analysis of code changes', expect: 'diffImpactData' },
+  { q: 'start MCP server for AI agents', expect: 'startMCPServer' },
+  { q: 'watch files for changes', expect: 'watchProject' },
+  { q: 'reciprocal rank fusion for multi-query search', expect: 'multiSearchData' },
+];
+
+async function benchmark(strategy) {
+  await buildEmbeddings(rootDir, model, dbPath, { strategy });
+
+  let hits1 = 0;
+  let hits3 = 0;
+  let hits5 = 0;
+  const details = [];
+
+  for (const { q, expect: expected } of QUERIES) {
+    const data = await searchData(q, dbPath, { minScore: 0.01, limit: 10 });
+    if (!data) continue;
+
+    const names = data.results.map((r) => r.name);
+    const rank = names.indexOf(expected) + 1; // 0 = not found
+    if (rank === 1) hits1++;
+    if (rank >= 1 && rank <= 3) hits3++;
+    if (rank >= 1 && rank <= 5) hits5++;
+
+    const matchScore = rank > 0 ? data.results[rank - 1].similarity.toFixed(3) : 'miss';
+    details.push({
+      q: q.slice(0, 50),
+      expected,
+      rank: rank || '>10',
+      actual: names[0],
+      matchScore,
+    });
+  }
+
+  return { strategy, hits1, hits3, hits5, total: QUERIES.length, details };
+}
+
+const modelConfig = MODELS[model];
+console.log('=== Embedding Strategy Benchmark ===');
+console.log(`Model: ${model} (${modelConfig.dim}d, ${modelConfig.contextWindow} token context)`);
+console.log(`Queries: ${QUERIES.length}`);
+console.log('');
+
+const structured = await benchmark('structured');
+const source = await benchmark('source');
+
+// Summary table
+console.log('');
+console.log('=== RESULTS ===');
+console.log('');
+console.log(`${'Metric'.padEnd(12)}${'structured'.padEnd(16)}${'source'.padEnd(16)}delta`);
+for (const [label, key] of [
+  ['Hit@1', 'hits1'],
+  ['Hit@3', 'hits3'],
+  ['Hit@5', 'hits5'],
+]) {
+  const s = structured[key];
+  const o = source[key];
+  const sp = `${s}/${structured.total} (${((s / structured.total) * 100).toFixed(0)}%)`;
+  const op = `${o}/${source.total} (${((o / source.total) * 100).toFixed(0)}%)`;
+  const delta = s - o;
+  const sign = delta > 0 ? '+' : '';
+  console.log(`${label.padEnd(12)}${sp.padEnd(16)}${op.padEnd(16)}${sign}${delta}`);
+}
+
+// Per-query comparison
+console.log('');
+console.log(`${'Query'.padEnd(52)}${'Expected'.padEnd(22)}Struct  Source`);
+for (let i = 0; i < QUERIES.length; i++) {
+  const s = structured.details[i];
+  const o = source.details[i];
+  const sw =
+    typeof s.rank === 'number' && (typeof o.rank !== 'number' || s.rank < o.rank) ? '*' : ' ';
+  const ow =
+    typeof o.rank === 'number' && (typeof s.rank !== 'number' || o.rank < s.rank) ? '*' : ' ';
+  console.log(
+    s.q.padEnd(52) +
+      s.expected.padEnd(22) +
+      String(s.rank).padEnd(4) +
+      sw +
+      '   ' +
+      String(o.rank).padEnd(4) +
+      ow,
+  );
+}
+console.log('');
+console.log('* = better rank for that query');
diff --git a/tests/search/embedding-strategy.test.js b/tests/search/embedding-strategy.test.js
new file mode 100644
index 000000000..5db82bb9d
--- /dev/null
+++ b/tests/search/embedding-strategy.test.js
@@ -0,0 +1,306 @@
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import Database from 'better-sqlite3';
+import { afterAll, beforeAll, describe, expect, test, vi } from 'vitest';
+import { initSchema } from '../../src/db.js';
+
+// ─── Mock setup ────────────────────────────────────────────────────────
+
+// Capture texts passed to the embedding model
+const { EMBEDDED_TEXTS } = vi.hoisted(() => ({
+  EMBEDDED_TEXTS: [],
+}));
+
+vi.mock('@huggingface/transformers', () => ({
+  pipeline: async () => async (batch) => {
+    const dim = 384;
+    const data = new Float32Array(dim * batch.length);
+    for (let t = 0; t < batch.length; t++) {
+      EMBEDDED_TEXTS.push(batch[t]);
+      data[t * dim] = 0.5;
+      data[t * dim + 1] = 0.3;
+    }
+    return { data };
+  },
+  cos_sim: () => 0,
+}));
+
+import {
+  buildEmbeddings,
+  EMBEDDING_STRATEGIES,
+  estimateTokens,
+  MODELS,
+} from '../../src/embedder.js';
+
+// ─── Helpers ───────────────────────────────────────────────────────────
+
+function insertNode(db, name, kind, file, line, endLine) {
+  return db
+    .prepare('INSERT INTO nodes (name, kind, file, line, end_line) VALUES (?, ?, ?, ?, ?)')
+    .run(name, kind, file, line, endLine).lastInsertRowid;
+}
+
+function insertEdge(db, sourceId, targetId, kind) {
+  db.prepare('INSERT INTO edges (source_id, target_id, kind) VALUES (?, ?, ?)').run(
+    sourceId,
+    targetId,
+    kind,
+  );
+}
+
+// ─── Fixture ───────────────────────────────────────────────────────────
+
+// Source files that match the DB nodes
+const FIXTURE_FILES = {
+  'math.js': [
+    '/**',
+    ' * Add two numbers together.',
+    ' */',
+    'export function add(a, b) { return a + b; }',
+    'export function multiply(a, b) { return a * b; }',
+    'export function square(x) { return multiply(x, x); }',
+  ].join('\n'),
+  'utils.js': [
+    "import { add, square } from './math.js';",
+    'export function sumOfSquares(a, b) { return add(square(a), square(b)); }',
+    'export class Calculator {',
+    '  compute(x, y) { return sumOfSquares(x, y); }',
+    '}',
+  ].join('\n'),
+};
+
+let tmpDir, dbPath;
+
+beforeAll(() => {
+  tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-strategy-test-'));
+
+  // Write source files
+  for (const [name, content] of Object.entries(FIXTURE_FILES)) {
+    fs.writeFileSync(path.join(tmpDir, name), content);
+  }
+
+  // Create DB with nodes + edges
+  const dbDir = path.join(tmpDir, '.codegraph');
+  fs.mkdirSync(dbDir, { recursive: true });
+  dbPath = path.join(dbDir, 'graph.db');
+
+  const db = new Database(dbPath);
+  db.pragma('journal_mode = WAL');
+  initSchema(db);
+
+  // math.js nodes (line numbers are 1-indexed)
+  const addId = insertNode(db, 'add', 'function', 'math.js', 4, 4);
+  const multiplyId = insertNode(db, 'multiply', 'function', 'math.js', 5, 5);
+  const squareId = insertNode(db, 'square', 'function', 'math.js', 6, 6);
+
+  // utils.js nodes
+  const sumOfSquaresId = insertNode(db, 'sumOfSquares', 'function', 'utils.js', 2, 2);
+  insertNode(db, 'Calculator', 'class', 'utils.js', 3, 5);
+  const computeId = insertNode(db, 'compute', 'method', 'utils.js', 4, 4);
+
+  // Call edges: square → multiply, sumOfSquares → add, sumOfSquares → square, compute → sumOfSquares
+  insertEdge(db, squareId, multiplyId, 'calls');
+  insertEdge(db, sumOfSquaresId, addId, 'calls');
+  insertEdge(db, sumOfSquaresId, squareId, 'calls');
+  insertEdge(db, computeId, sumOfSquaresId, 'calls');
+
+  db.close();
+});
+
+afterAll(() => {
+  if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true });
+});
+
+// ─── Tests ─────────────────────────────────────────────────────────────
+
+describe('EMBEDDING_STRATEGIES', () => {
+  test('exports valid strategies', () => {
+    expect(EMBEDDING_STRATEGIES).toContain('structured');
+    expect(EMBEDDING_STRATEGIES).toContain('source');
+  });
+});
+
+describe('estimateTokens', () => {
+  test('estimates ~4 chars per token', () => {
+    expect(estimateTokens('abcd')).toBe(1);
+    expect(estimateTokens('abcdefgh')).toBe(2);
+    expect(estimateTokens('a'.repeat(100))).toBe(25);
+  });
+
+  test('rounds up', () => {
+    expect(estimateTokens('abcde')).toBe(2);
+  });
+
+  test('handles empty string', () => {
+    expect(estimateTokens('')).toBe(0);
+  });
+});
+
+describe('MODELS contextWindow', () => {
+  test('every model has a contextWindow', () => {
+    for (const [key, config] of Object.entries(MODELS)) {
+      expect(config.contextWindow, `${key} missing contextWindow`).toBeGreaterThan(0);
+    }
+  });
+});
+
+describe('buildEmbeddings with structured strategy', () => {
+  test('produces embeddings with graph context', async () => {
+    EMBEDDED_TEXTS.length = 0;
+    await buildEmbeddings(tmpDir, 'minilm', dbPath, { strategy: 'structured' });
+
+    expect(EMBEDDED_TEXTS.length).toBeGreaterThan(0);
+
+    // square calls multiply → should appear in structured text
+    const squareText = EMBEDDED_TEXTS.find((t) => t.startsWith('function square'));
+    expect(squareText).toBeDefined();
+    expect(squareText).toContain('Calls:');
+    expect(squareText).toContain('multiply');
+
+    // sumOfSquares calls add and square → should appear
+    const sosText = EMBEDDED_TEXTS.find((t) => t.startsWith('function sumOfSquares'));
+    expect(sosText).toBeDefined();
+    expect(sosText).toContain('Calls:');
+    expect(sosText).toContain('add');
+    expect(sosText).toContain('square');
+
+    // sumOfSquares is called by compute → should appear
+    expect(sosText).toContain('Called by:');
+    expect(sosText).toContain('compute');
+  });
+
+  test('extracts leading comments', async () => {
+    // add has a JSDoc comment above it: "Add two numbers together."
+    const addText = EMBEDDED_TEXTS.find((t) => t.startsWith('function add'));
+    expect(addText).toBeDefined();
+    expect(addText).toContain('Add two numbers together');
+  });
+
+  test('extracts parameters from signature', async () => {
+    const addText = EMBEDDED_TEXTS.find((t) => t.startsWith('function add'));
+    expect(addText).toBeDefined();
+    expect(addText).toContain('Parameters:');
+    expect(addText).toContain('a, b');
+  });
+
+  test('stores strategy in metadata', async () => {
+    const db = new Database(dbPath, { readonly: true });
+    const row = db.prepare("SELECT value FROM embedding_meta WHERE key = 'strategy'").get();
+    db.close();
+    expect(row.value).toBe('structured');
+  });
+
+  test('structured texts are compact', () => {
+    for (const text of EMBEDDED_TEXTS) {
+      const tokens = estimateTokens(text);
+      expect(tokens).toBeLessThan(200);
+    }
+  });
+});
+
+describe('buildEmbeddings with source strategy', () => {
+  test('produces embeddings with raw source code', async () => {
+    EMBEDDED_TEXTS.length = 0;
+    await buildEmbeddings(tmpDir, 'minilm', dbPath, { strategy: 'source' });
+
+    expect(EMBEDDED_TEXTS.length).toBeGreaterThan(0);
+
+    // Source strategy should NOT have graph context lines
+    const squareText = EMBEDDED_TEXTS.find((t) => t.startsWith('function square'));
+    expect(squareText).toBeDefined();
+    expect(squareText).not.toContain('Calls:');
+    expect(squareText).not.toContain('Called by:');
+    expect(squareText).toContain('return');
+  });
+
+  test('stores strategy in metadata', async () => {
+    const db = new Database(dbPath, { readonly: true });
+    const row = db.prepare("SELECT value FROM embedding_meta WHERE key = 'strategy'").get();
+    db.close();
+    expect(row.value).toBe('source');
+  });
+});
+
+describe('buildEmbeddings defaults to structured', () => {
+  test('no options → structured strategy', async () => {
+    EMBEDDED_TEXTS.length = 0;
+    await buildEmbeddings(tmpDir, 'minilm', dbPath);
+
+    const db = new Database(dbPath, { readonly: true });
+    const row = db.prepare("SELECT value FROM embedding_meta WHERE key = 'strategy'").get();
+    db.close();
+    expect(row.value).toBe('structured');
+  });
+});
+
+describe('context window overflow detection', () => {
+  let bigDir, bigDbPath;
+
+  beforeAll(() => {
+    // Create a file with a very large function that will overflow minilm's 256-token window
+    bigDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-overflow-test-'));
+    const bigFn =
+      'export function bigFunction(x) {\n' +
+      '  const data = [];\n'.repeat(400) +
+      '  return data;\n}\n';
+    fs.writeFileSync(path.join(bigDir, 'big.js'), bigFn);
+
+    const bigDbDir = path.join(bigDir, '.codegraph');
+    fs.mkdirSync(bigDbDir, { recursive: true });
+    bigDbPath = path.join(bigDbDir, 'graph.db');
+
+    const db = new Database(bigDbPath);
+    db.pragma('journal_mode = WAL');
+    initSchema(db);
+    insertNode(db, 'bigFunction', 'function', 'big.js', 1, 403);
+    db.close();
+  });
+
+  afterAll(() => {
+    if (bigDir) fs.rmSync(bigDir, { recursive: true, force: true });
+  });
+
+  test('warns and truncates when source text exceeds context window', async () => {
+    const warnSpy = vi.spyOn(process.stderr, 'write').mockImplementation(() => true);
+
+    EMBEDDED_TEXTS.length = 0;
+    await buildEmbeddings(bigDir, 'minilm', bigDbPath, { strategy: 'source' });
+
+    const warnOutput = warnSpy.mock.calls.map((c) => c[0]).join('');
+    warnSpy.mockRestore();
+
+    expect(warnOutput).toContain('exceeded model context window');
+    expect(warnOutput).toContain('truncated');
+
+    // Text should be truncated to fit minilm's 256-token ≈ 1024 char limit
+    const bigText = EMBEDDED_TEXTS.find((t) => t.includes('bigFunction'));
+    expect(bigText).toBeDefined();
+    expect(bigText.length).toBeLessThanOrEqual(256 * 4);
+
+    // Metadata records truncation count
+    const db = new Database(bigDbPath, { readonly: true });
+    const row = db.prepare("SELECT value FROM embedding_meta WHERE key = 'truncated_count'").get();
+    db.close();
+    expect(row).toBeDefined();
+    expect(Number(row.value)).toBeGreaterThan(0);
+  });
+
+  test('structured strategy avoids overflow for same function', async () => {
+    const warnSpy = vi.spyOn(process.stderr, 'write').mockImplementation(() => true);
+
+    EMBEDDED_TEXTS.length = 0;
+    await buildEmbeddings(bigDir, 'minilm', bigDbPath, { strategy: 'structured' });
+
+    const warnOutput = warnSpy.mock.calls.map((c) => c[0]).join('');
+    warnSpy.mockRestore();
+
+    // Structured strategy only uses first few lines + graph context → should NOT overflow
+    const bigText = EMBEDDED_TEXTS.find((t) => t.includes('bigFunction'));
+    expect(bigText).toBeDefined();
+    expect(estimateTokens(bigText)).toBeLessThan(256);
+
+    // No truncation warning expected
+    expect(warnOutput).not.toContain('exceeded model context window');
+  });
+});