gitmem-dev
diff --git a/‎CHANGELOG.md‎
Lines changed: 13 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎package.json‎
Lines changed: 1 addition & 1 deletion b/‎package.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/hooks/format-utils.ts‎
Lines changed: 5 additions & 0 deletions b/‎src/hooks/format-utils.ts‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/server.ts‎
Lines changed: 16 additions & 0 deletions b/‎src/server.ts‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎src/services/doc-chunker.ts‎
Lines changed: 249 additions & 0 deletions b/‎src/services/doc-chunker.ts‎
Lines changed: 249 additions & 0 deletions
@@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [1.5.0] - 2026-05-11
+
+### Added
+- **`index_docs` tool**: Scan a directory of markdown files, chunk them, and store in a local doc index for semantic search. Supports incremental indexing (only re-processes changed files), force re-index, and project-scoped indexes. Aliases: `gitmem-idx`.
+- **`search_docs` tool**: Search indexed repository documentation using semantic similarity (pro tier) or BM25 keyword search (free tier). Returns relevant chunks with file paths for targeted reading. Aliases: `gitmem-sd`.
+- **Citation protocol**: `recall`, `search`, and `prepare_context` now include a citation rule instructing agents to cite record IDs when referencing facts from institutional memory.
+- **Low confidence tagging**: Recall and search results with similarity below 0.55 are tagged `[low confidence]` — these matches have a 66% N/A rate historically.
+- **Session duration on resume**: `session_start` now shows elapsed session time and loaded scar count when resuming or refreshing an existing session.
+
+### Changed
+- **Quick close hard gate**: `session_close` with `close_type: "quick"` now rejects sessions over 30 minutes, requiring standard close instead.
+- **Standard close recall gate**: `session_close` with `close_type: "standard"` now requires at least one `recall()` call during the session (exemptions: quick close, autonomous agents, sessions with inline reflection).
+
 ## [1.4.4] - 2026-03-31
 
 ### Fixed
 
@@ -1,6 +1,6 @@
 {
   "name": "gitmem-mcp",
-  "version": "1.4.4",
+  "version": "1.5.0",
   "mcpName": "io.github.gitmem-dev/gitmem",
   "description": "Persistent learning memory for AI coding agents. Memory that compounds.",
   "type": "module",
 
@@ -97,6 +97,11 @@ export function formatCompact(
     included++;
   }
 
+  // Citation reminder for sub-agent context (compact — one line)
+  if (included > 0) {
+    lines.push("Cite record IDs for any factual claims from these scars.");
+  }
+
   return { payload: lines.join("\n"), included };
 }
 
 
@@ -53,6 +53,10 @@ import { archiveLearning } from "./tools/archive-learning.js";
 import type { ArchiveLearningParams } from "./tools/archive-learning.js";
 import { contributeFeedback } from "./tools/contribute-feedback.js";
 import type { ContributeFeedbackParams } from "./schemas/contribute-feedback.js";
+import { indexDocs } from "./tools/index-docs.js";
+import type { IndexDocsParams } from "./tools/index-docs.js";
+import { searchDocsHandler } from "./tools/search-docs.js";
+import type { SearchDocsParams } from "./tools/search-docs.js";
 import type { AbsorbObservationsParams, ListThreadsParams, ResolveThreadParams } from "./types/index.js";
 import {
   getCacheStatus,
@@ -304,6 +308,8 @@ export function createServer(): Server {
             { alias: "gitmem-al", full: "archive_learning", description: "Archive a scar/win/pattern (is_active=false)" },
             { alias: "gitmem-graph", full: "graph_traverse", description: "Traverse knowledge graph over institutional memory" },
             { alias: "gitmem-fb", full: "contribute_feedback", description: "Submit feedback about gitmem (10/session limit)" },
+            { alias: "gitmem-idx", full: "index_docs", description: "Index markdown docs for semantic search" },
+            { alias: "gitmem-sd", full: "search_docs", description: "Search indexed repository docs" },
           ];
           if (hasBatchOperations()) {
             commands.push({ alias: "gitmem-rsb", full: "record_scar_usage_batch", description: "Track multiple scars (batch)" });
@@ -390,6 +396,16 @@ export function createServer(): Server {
         case "gm-cache-f":
           result = await flushCache((toolArgs.project as Project) || getProject() as Project || "default");
           break;
+
+        // Doc indexing and search
+        case "index_docs":
+        case "gitmem-idx":
+          result = await indexDocs(toolArgs as unknown as IndexDocsParams);
+          break;
+        case "search_docs":
+        case "gitmem-sd":
+          result = await searchDocsHandler(toolArgs as unknown as SearchDocsParams);
+          break;
         default:
           throw new Error(`Unknown tool: ${name}`);
       }
 
@@ -0,0 +1,249 @@
+/**
+ * Document Chunker — Split markdown files into searchable chunks
+ *
+ * Strategy:
+ * 1. Split on H2 headers first (natural semantic boundaries)
+ * 2. If a section exceeds target size, split on paragraph boundaries
+ * 3. Each chunk carries metadata: file path, title, category, chunk index
+ *
+ * Target chunk size: 500-800 tokens (~2000-3200 chars)
+ */
+
+import * as fs from "fs";
+import * as path from "path";
+import * as crypto from "crypto";
+
+const TARGET_CHUNK_CHARS = 2400; // ~600 tokens
+const MAX_CHUNK_CHARS = 3600; // ~900 tokens hard limit
+const MIN_CHUNK_CHARS = 200; // Don't create tiny chunks
+
+export interface DocChunk {
+  file_path: string; // Relative path from scan root
+  chunk_index: number;
+  title: string; // H1 or filename
+  section_title: string; // H2 header for this chunk (or "")
+  category: string; // Directory name (e.g., "research", "architecture")
+  content: string; // The chunk text
+  file_hash: string; // SHA-256 of full file content (for change detection)
+}
+
+export interface DocFile {
+  absolute_path: string;
+  relative_path: string;
+  content: string;
+  hash: string;
+}
+
+/**
+ * Extract title from markdown content (first H1, or filename)
+ */
+function extractTitle(content: string, filePath: string): string {
+  const h1Match = content.match(/^#\s+(.+)$/m);
+  if (h1Match) return h1Match[1].trim();
+
+  // Fall back to filename without extension
+  return path.basename(filePath, ".md").replace(/[-_]/g, " ");
+}
+
+/**
+ * Extract category from directory structure
+ */
+function extractCategory(relativePath: string): string {
+  const parts = relativePath.split(path.sep);
+  if (parts.length > 1) return parts[0];
+  return "root";
+}
+
+/**
+ * Split markdown into sections by H2 headers
+ */
+function splitByH2(content: string): Array<{ title: string; content: string }> {
+  const sections: Array<{ title: string; content: string }> = [];
+  const lines = content.split("\n");
+  let currentTitle = "";
+  let currentLines: string[] = [];
+
+  for (const line of lines) {
+    const h2Match = line.match(/^##\s+(.+)$/);
+    if (h2Match) {
+      // Save previous section if it has content
+      if (currentLines.length > 0) {
+        const text = currentLines.join("\n").trim();
+        if (text.length > 0) {
+          sections.push({ title: currentTitle, content: text });
+        }
+      }
+      currentTitle = h2Match[1].trim();
+      currentLines = [];
+    } else {
+      currentLines.push(line);
+    }
+  }
+
+  // Don't forget the last section
+  if (currentLines.length > 0) {
+    const text = currentLines.join("\n").trim();
+    if (text.length > 0) {
+      sections.push({ title: currentTitle, content: text });
+    }
+  }
+
+  return sections;
+}
+
+/**
+ * Split a text blob on paragraph boundaries to fit within target size
+ */
+function splitByParagraphs(text: string, maxChars: number): string[] {
+  if (text.length <= maxChars) return [text];
+
+  const chunks: string[] = [];
+  const paragraphs = text.split(/\n\n+/);
+  let current = "";
+
+  for (const para of paragraphs) {
+    if (current.length + para.length + 2 > maxChars && current.length > 0) {
+      chunks.push(current.trim());
+      current = para;
+    } else {
+      current = current ? current + "\n\n" + para : para;
+    }
+  }
+
+  if (current.trim().length > 0) {
+    chunks.push(current.trim());
+  }
+
+  return chunks;
+}
+
+/**
+ * Compute SHA-256 hash of content
+ */
+function hashContent(content: string): string {
+  return crypto.createHash("sha256").update(content).digest("hex");
+}
+
+/**
+ * Chunk a single markdown file into searchable segments
+ */
+export function chunkDocument(doc: DocFile): DocChunk[] {
+  const title = extractTitle(doc.content, doc.relative_path);
+  const category = extractCategory(doc.relative_path);
+  const chunks: DocChunk[] = [];
+  let chunkIndex = 0;
+
+  // Split by H2 headers
+  const sections = splitByH2(doc.content);
+
+  for (const section of sections) {
+    // If section fits in one chunk, use it directly
+    if (section.content.length <= MAX_CHUNK_CHARS) {
+      if (section.content.length >= MIN_CHUNK_CHARS) {
+        chunks.push({
+          file_path: doc.relative_path,
+          chunk_index: chunkIndex++,
+          title,
+          section_title: section.title,
+          category,
+          content: section.content,
+          file_hash: doc.hash,
+        });
+      }
+    } else {
+      // Section too large — split by paragraphs
+      const subChunks = splitByParagraphs(section.content, TARGET_CHUNK_CHARS);
+      for (const sub of subChunks) {
+        if (sub.length >= MIN_CHUNK_CHARS) {
+          chunks.push({
+            file_path: doc.relative_path,
+            chunk_index: chunkIndex++,
+            title,
+            section_title: section.title,
+            category,
+            content: sub,
+            file_hash: doc.hash,
+          });
+        }
+      }
+    }
+  }
+
+  // Edge case: file with no H2 headers and short content — one chunk
+  if (chunks.length === 0 && doc.content.trim().length >= MIN_CHUNK_CHARS) {
+    chunks.push({
+      file_path: doc.relative_path,
+      chunk_index: 0,
+      title,
+      section_title: "",
+      category,
+      content: doc.content.trim().slice(0, MAX_CHUNK_CHARS),
+      file_hash: doc.hash,
+    });
+  }
+
+  return chunks;
+}
+
+/**
+ * Scan a directory for markdown files
+ */
+export function scanDirectory(
+  dirPath: string,
+  options: { exclude?: string[] } = {}
+): DocFile[] {
+  const exclude = options.exclude || ["_archive", "node_modules", ".git"];
+  const files: DocFile[] = [];
+
+  function walk(currentPath: string): void {
+    let entries: fs.Dirent[];
+    try {
+      entries = fs.readdirSync(currentPath, { withFileTypes: true });
+    } catch {
+      return; // Permission denied or inaccessible
+    }
+
+    for (const entry of entries) {
+      const fullPath = path.join(currentPath, entry.name);
+
+      if (entry.isDirectory()) {
+        if (!exclude.includes(entry.name)) {
+          walk(fullPath);
+        }
+      } else if (entry.isFile() && entry.name.endsWith(".md")) {
+        try {
+          const content = fs.readFileSync(fullPath, "utf-8");
+          const relativePath = path.relative(dirPath, fullPath);
+          files.push({
+            absolute_path: fullPath,
+            relative_path: relativePath,
+            content,
+            hash: hashContent(content),
+          });
+        } catch {
+          // Skip unreadable files
+        }
+      }
+    }
+  }
+
+  walk(dirPath);
+  return files;
+}
+
+/**
+ * Chunk all markdown files in a directory
+ */
+export function chunkDirectory(
+  dirPath: string,
+  options: { exclude?: string[] } = {}
+): { files: DocFile[]; chunks: DocChunk[] } {
+  const files = scanDirectory(dirPath, options);
+  const chunks: DocChunk[] = [];
+
+  for (const file of files) {
+    chunks.push(...chunkDocument(file));
+  }
+
+  return { files, chunks };
+}
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "gitmem-mcp",`
`3`		`- "version": "1.4.4",`
	`3`	`+ "version": "1.5.0",`
`4`	`4`	`"mcpName": "io.github.gitmem-dev/gitmem",`
`5`	`5`	`"description": "Persistent learning memory for AI coding agents. Memory that compounds.",`
`6`	`6`	`"type": "module",`
Original file line number	Diff line number	Diff line change
`@@ -97,6 +97,11 @@ export function formatCompact(`
`97`	`97`	`included++;`
`98`	`98`	`}`
`99`	`99`
	`100`	`+ // Citation reminder for sub-agent context (compact — one line)`
	`101`	`+ if (included > 0) {`
	`102`	`+ lines.push("Cite record IDs for any factual claims from these scars.");`
	`103`	`+ }`
	`104`	`+`
`100`	`105`	`return { payload: lines.join("\n"), included };`
`101`	`106`	`}`
`102`	`107`