Skip to content

Commit 7786fa9

Browse files
Claudeclaude
authored andcommitted
chore: bump version to v1.5.0
Add doc indexing (index_docs, search_docs), citation protocol, low-confidence tagging, session duration on resume, and close compliance gates (quick 30min limit, standard recall requirement). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 6144da8 commit 7786fa9

18 files changed

Lines changed: 1891 additions & 5 deletions

CHANGELOG.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [1.5.0] - 2026-05-11
11+
12+
### Added
13+
- **`index_docs` tool**: Scan a directory of markdown files, chunk them, and store in a local doc index for semantic search. Supports incremental indexing (only re-processes changed files), force re-index, and project-scoped indexes. Aliases: `gitmem-idx`.
14+
- **`search_docs` tool**: Search indexed repository documentation using semantic similarity (pro tier) or BM25 keyword search (free tier). Returns relevant chunks with file paths for targeted reading. Aliases: `gitmem-sd`.
15+
- **Citation protocol**: `recall`, `search`, and `prepare_context` now include a citation rule instructing agents to cite record IDs when referencing facts from institutional memory.
16+
- **Low confidence tagging**: Recall and search results with similarity below 0.55 are tagged `[low confidence]` — these matches have a 66% N/A rate historically.
17+
- **Session duration on resume**: `session_start` now shows elapsed session time and loaded scar count when resuming or refreshing an existing session.
18+
19+
### Changed
20+
- **Quick close hard gate**: `session_close` with `close_type: "quick"` now rejects sessions over 30 minutes, requiring standard close instead.
21+
- **Standard close recall gate**: `session_close` with `close_type: "standard"` now requires at least one `recall()` call during the session (exemptions: quick close, autonomous agents, sessions with inline reflection).
22+
1023
## [1.4.4] - 2026-03-31
1124

1225
### Fixed

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "gitmem-mcp",
3-
"version": "1.4.4",
3+
"version": "1.5.0",
44
"mcpName": "io.github.gitmem-dev/gitmem",
55
"description": "Persistent learning memory for AI coding agents. Memory that compounds.",
66
"type": "module",

src/hooks/format-utils.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,11 @@ export function formatCompact(
9797
included++;
9898
}
9999

100+
// Citation reminder for sub-agent context (compact — one line)
101+
if (included > 0) {
102+
lines.push("Cite record IDs for any factual claims from these scars.");
103+
}
104+
100105
return { payload: lines.join("\n"), included };
101106
}
102107

src/server.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,10 @@ import { archiveLearning } from "./tools/archive-learning.js";
5353
import type { ArchiveLearningParams } from "./tools/archive-learning.js";
5454
import { contributeFeedback } from "./tools/contribute-feedback.js";
5555
import type { ContributeFeedbackParams } from "./schemas/contribute-feedback.js";
56+
import { indexDocs } from "./tools/index-docs.js";
57+
import type { IndexDocsParams } from "./tools/index-docs.js";
58+
import { searchDocsHandler } from "./tools/search-docs.js";
59+
import type { SearchDocsParams } from "./tools/search-docs.js";
5660
import type { AbsorbObservationsParams, ListThreadsParams, ResolveThreadParams } from "./types/index.js";
5761
import {
5862
getCacheStatus,
@@ -304,6 +308,8 @@ export function createServer(): Server {
304308
{ alias: "gitmem-al", full: "archive_learning", description: "Archive a scar/win/pattern (is_active=false)" },
305309
{ alias: "gitmem-graph", full: "graph_traverse", description: "Traverse knowledge graph over institutional memory" },
306310
{ alias: "gitmem-fb", full: "contribute_feedback", description: "Submit feedback about gitmem (10/session limit)" },
311+
{ alias: "gitmem-idx", full: "index_docs", description: "Index markdown docs for semantic search" },
312+
{ alias: "gitmem-sd", full: "search_docs", description: "Search indexed repository docs" },
307313
];
308314
if (hasBatchOperations()) {
309315
commands.push({ alias: "gitmem-rsb", full: "record_scar_usage_batch", description: "Track multiple scars (batch)" });
@@ -390,6 +396,16 @@ export function createServer(): Server {
390396
case "gm-cache-f":
391397
result = await flushCache((toolArgs.project as Project) || getProject() as Project || "default");
392398
break;
399+
400+
// Doc indexing and search
401+
case "index_docs":
402+
case "gitmem-idx":
403+
result = await indexDocs(toolArgs as unknown as IndexDocsParams);
404+
break;
405+
case "search_docs":
406+
case "gitmem-sd":
407+
result = await searchDocsHandler(toolArgs as unknown as SearchDocsParams);
408+
break;
393409
default:
394410
throw new Error(`Unknown tool: ${name}`);
395411
}

src/services/doc-chunker.ts

Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
/**
2+
* Document Chunker — Split markdown files into searchable chunks
3+
*
4+
* Strategy:
5+
* 1. Split on H2 headers first (natural semantic boundaries)
6+
* 2. If a section exceeds target size, split on paragraph boundaries
7+
* 3. Each chunk carries metadata: file path, title, category, chunk index
8+
*
9+
* Target chunk size: 500-800 tokens (~2000-3200 chars)
10+
*/
11+
12+
import * as fs from "fs";
13+
import * as path from "path";
14+
import * as crypto from "crypto";
15+
16+
const TARGET_CHUNK_CHARS = 2400; // ~600 tokens
17+
const MAX_CHUNK_CHARS = 3600; // ~900 tokens hard limit
18+
const MIN_CHUNK_CHARS = 200; // Don't create tiny chunks
19+
20+
export interface DocChunk {
21+
file_path: string; // Relative path from scan root
22+
chunk_index: number;
23+
title: string; // H1 or filename
24+
section_title: string; // H2 header for this chunk (or "")
25+
category: string; // Directory name (e.g., "research", "architecture")
26+
content: string; // The chunk text
27+
file_hash: string; // SHA-256 of full file content (for change detection)
28+
}
29+
30+
export interface DocFile {
31+
absolute_path: string;
32+
relative_path: string;
33+
content: string;
34+
hash: string;
35+
}
36+
37+
/**
38+
* Extract title from markdown content (first H1, or filename)
39+
*/
40+
function extractTitle(content: string, filePath: string): string {
41+
const h1Match = content.match(/^#\s+(.+)$/m);
42+
if (h1Match) return h1Match[1].trim();
43+
44+
// Fall back to filename without extension
45+
return path.basename(filePath, ".md").replace(/[-_]/g, " ");
46+
}
47+
48+
/**
49+
* Extract category from directory structure
50+
*/
51+
function extractCategory(relativePath: string): string {
52+
const parts = relativePath.split(path.sep);
53+
if (parts.length > 1) return parts[0];
54+
return "root";
55+
}
56+
57+
/**
58+
* Split markdown into sections by H2 headers
59+
*/
60+
function splitByH2(content: string): Array<{ title: string; content: string }> {
61+
const sections: Array<{ title: string; content: string }> = [];
62+
const lines = content.split("\n");
63+
let currentTitle = "";
64+
let currentLines: string[] = [];
65+
66+
for (const line of lines) {
67+
const h2Match = line.match(/^##\s+(.+)$/);
68+
if (h2Match) {
69+
// Save previous section if it has content
70+
if (currentLines.length > 0) {
71+
const text = currentLines.join("\n").trim();
72+
if (text.length > 0) {
73+
sections.push({ title: currentTitle, content: text });
74+
}
75+
}
76+
currentTitle = h2Match[1].trim();
77+
currentLines = [];
78+
} else {
79+
currentLines.push(line);
80+
}
81+
}
82+
83+
// Don't forget the last section
84+
if (currentLines.length > 0) {
85+
const text = currentLines.join("\n").trim();
86+
if (text.length > 0) {
87+
sections.push({ title: currentTitle, content: text });
88+
}
89+
}
90+
91+
return sections;
92+
}
93+
94+
/**
95+
* Split a text blob on paragraph boundaries to fit within target size
96+
*/
97+
function splitByParagraphs(text: string, maxChars: number): string[] {
98+
if (text.length <= maxChars) return [text];
99+
100+
const chunks: string[] = [];
101+
const paragraphs = text.split(/\n\n+/);
102+
let current = "";
103+
104+
for (const para of paragraphs) {
105+
if (current.length + para.length + 2 > maxChars && current.length > 0) {
106+
chunks.push(current.trim());
107+
current = para;
108+
} else {
109+
current = current ? current + "\n\n" + para : para;
110+
}
111+
}
112+
113+
if (current.trim().length > 0) {
114+
chunks.push(current.trim());
115+
}
116+
117+
return chunks;
118+
}
119+
120+
/**
121+
* Compute SHA-256 hash of content
122+
*/
123+
function hashContent(content: string): string {
124+
return crypto.createHash("sha256").update(content).digest("hex");
125+
}
126+
127+
/**
128+
* Chunk a single markdown file into searchable segments
129+
*/
130+
export function chunkDocument(doc: DocFile): DocChunk[] {
131+
const title = extractTitle(doc.content, doc.relative_path);
132+
const category = extractCategory(doc.relative_path);
133+
const chunks: DocChunk[] = [];
134+
let chunkIndex = 0;
135+
136+
// Split by H2 headers
137+
const sections = splitByH2(doc.content);
138+
139+
for (const section of sections) {
140+
// If section fits in one chunk, use it directly
141+
if (section.content.length <= MAX_CHUNK_CHARS) {
142+
if (section.content.length >= MIN_CHUNK_CHARS) {
143+
chunks.push({
144+
file_path: doc.relative_path,
145+
chunk_index: chunkIndex++,
146+
title,
147+
section_title: section.title,
148+
category,
149+
content: section.content,
150+
file_hash: doc.hash,
151+
});
152+
}
153+
} else {
154+
// Section too large — split by paragraphs
155+
const subChunks = splitByParagraphs(section.content, TARGET_CHUNK_CHARS);
156+
for (const sub of subChunks) {
157+
if (sub.length >= MIN_CHUNK_CHARS) {
158+
chunks.push({
159+
file_path: doc.relative_path,
160+
chunk_index: chunkIndex++,
161+
title,
162+
section_title: section.title,
163+
category,
164+
content: sub,
165+
file_hash: doc.hash,
166+
});
167+
}
168+
}
169+
}
170+
}
171+
172+
// Edge case: file with no H2 headers and short content — one chunk
173+
if (chunks.length === 0 && doc.content.trim().length >= MIN_CHUNK_CHARS) {
174+
chunks.push({
175+
file_path: doc.relative_path,
176+
chunk_index: 0,
177+
title,
178+
section_title: "",
179+
category,
180+
content: doc.content.trim().slice(0, MAX_CHUNK_CHARS),
181+
file_hash: doc.hash,
182+
});
183+
}
184+
185+
return chunks;
186+
}
187+
188+
/**
189+
* Scan a directory for markdown files
190+
*/
191+
export function scanDirectory(
192+
dirPath: string,
193+
options: { exclude?: string[] } = {}
194+
): DocFile[] {
195+
const exclude = options.exclude || ["_archive", "node_modules", ".git"];
196+
const files: DocFile[] = [];
197+
198+
function walk(currentPath: string): void {
199+
let entries: fs.Dirent[];
200+
try {
201+
entries = fs.readdirSync(currentPath, { withFileTypes: true });
202+
} catch {
203+
return; // Permission denied or inaccessible
204+
}
205+
206+
for (const entry of entries) {
207+
const fullPath = path.join(currentPath, entry.name);
208+
209+
if (entry.isDirectory()) {
210+
if (!exclude.includes(entry.name)) {
211+
walk(fullPath);
212+
}
213+
} else if (entry.isFile() && entry.name.endsWith(".md")) {
214+
try {
215+
const content = fs.readFileSync(fullPath, "utf-8");
216+
const relativePath = path.relative(dirPath, fullPath);
217+
files.push({
218+
absolute_path: fullPath,
219+
relative_path: relativePath,
220+
content,
221+
hash: hashContent(content),
222+
});
223+
} catch {
224+
// Skip unreadable files
225+
}
226+
}
227+
}
228+
}
229+
230+
walk(dirPath);
231+
return files;
232+
}
233+
234+
/**
235+
* Chunk all markdown files in a directory
236+
*/
237+
export function chunkDirectory(
238+
dirPath: string,
239+
options: { exclude?: string[] } = {}
240+
): { files: DocFile[]; chunks: DocChunk[] } {
241+
const files = scanDirectory(dirPath, options);
242+
const chunks: DocChunk[] = [];
243+
244+
for (const file of files) {
245+
chunks.push(...chunkDocument(file));
246+
}
247+
248+
return { files, chunks };
249+
}

0 commit comments

Comments
 (0)